Merge pull request #1314 from acelaya-forks/feature/paginated-tags-performance

Feature/paginated tags performance
This commit is contained in:
Alejandro Celaya 2022-01-08 17:37:01 +01:00 committed by GitHub
commit a46d510e2b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 107 additions and 36 deletions

View file

@ -12,11 +12,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this
The `short-urls:list` command now accepts a `-i`/`--including-all-tags` flag which behaves the same.
* [#1273](https://github.com/shlinkio/shlink/issues/1273) Added support for pagination in tags lists.
* [#1273](https://github.com/shlinkio/shlink/issues/1273) Added support for pagination in tags lists, allowing to improve performance by loading subsets of tags.
For backwards compatibility, lists continue returning all items by default, but the `GET /tags` endpoint now supports `page` and `itemsPerPage` query params, to make sure only a subset of the tags is returned.
This is supported both when invoking the endpoint with and without `withStats=true`.
This is supported both when invoking the endpoint with and without `withStats=true` query param.
Additionally, the endpoint also supports filtering by `searchTerm` query param. When provided, only tags matching it will be returned.

View file

@ -4,19 +4,31 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Repository;
use Doctrine\ORM\Query\ResultSetMappingBuilder;
use Happyr\DoctrineSpecification\Repository\EntitySpecificationRepository;
use Happyr\DoctrineSpecification\Spec;
use Shlinkio\Shlink\Core\Entity\Tag;
use Shlinkio\Shlink\Core\Tag\Model\TagInfo;
use Shlinkio\Shlink\Core\Tag\Model\TagsListFiltering;
use Shlinkio\Shlink\Core\Tag\Spec\CountTagsWithName;
use Shlinkio\Shlink\Rest\ApiKey\Role;
use Shlinkio\Shlink\Rest\ApiKey\Spec\WithApiKeySpecsEnsuringJoin;
use Shlinkio\Shlink\Rest\Entity\ApiKey;
use function Functional\map;
use function is_object;
use function method_exists;
use function sprintf;
use function strlen;
use function strpos;
use function substr_replace;
use const PHP_INT_MAX;
class TagRepository extends EntitySpecificationRepository implements TagRepositoryInterface
{
private const PARAM_PLACEHOLDER = '?';
public function deleteByName(array $names): int
{
if (empty($names)) {
@ -35,29 +47,76 @@ class TagRepository extends EntitySpecificationRepository implements TagReposito
*/
public function findTagsWithInfo(?TagsListFiltering $filtering = null): array
{
$qb = $this->createQueryBuilder('t');
$qb->select('t AS tag', 'COUNT(DISTINCT s.id) AS shortUrlsCount', 'COUNT(DISTINCT v.id) AS visitsCount')
->leftJoin('t.shortUrls', 's')
->leftJoin('s.visits', 'v')
->groupBy('t')
->orderBy('t.name', 'ASC')
->setMaxResults($filtering?->limit())
->setFirstResult($filtering?->offset());
$conn = $this->getEntityManager()->getConnection();
$subQb = $this->createQueryBuilder('t');
$subQb->select('t.id', 't.name')
->orderBy('t.name', 'ASC') // TODO Make dynamic
->setMaxResults($filtering?->limit() ?? PHP_INT_MAX)
->setFirstResult($filtering?->offset() ?? 0);
$searchTerm = $filtering?->searchTerm();
if ($searchTerm !== null) {
$qb->andWhere($qb->expr()->like('t.name', ':searchPattern'))
->setParameter('searchPattern', '%' . $searchTerm . '%');
$subQb->andWhere($subQb->expr()->like('t.name', $conn->quote('%' . $searchTerm . '%')));
}
$apiKey = $filtering?->apiKey();
if ($apiKey !== null) {
$this->applySpecification($qb, $apiKey->spec(false, 'shortUrls'), 't');
$this->applySpecification($subQb, $apiKey->spec(false, 'shortUrls'), 't');
}
$subQuery = $subQb->getQuery();
$subQuerySql = $subQuery->getSQL();
// Sadly, we need to manually interpolate the params in the query replacing the placeholders, as this is going
// to be used as a sub-query in a native query. There's no need to sanitize, though.
foreach ($subQuery->getParameters() as $param) {
$value = $param->getValue();
$pos = strpos($subQuerySql, self::PARAM_PLACEHOLDER);
$subQuerySql = substr_replace(
$subQuerySql,
sprintf('\'%s\'', is_object($value) && method_exists($value, 'getId') ? $value->getId() : $value),
$pos === false ? -1 : $pos,
strlen(self::PARAM_PLACEHOLDER),
);
}
// A native query builder needs to be used here, because DQL and ORM query builders do not support
// sub-queries at "from" and "join" level.
// If no sub-query is used, the whole list is loaded even with pagination, making it very inefficient.
$nativeQb = $conn->createQueryBuilder();
$nativeQb
->select(
't.id_0 AS id',
't.name_1 AS name',
'COUNT(DISTINCT s.id) AS short_urls_count',
'COUNT(DISTINCT v.id) AS visits_count',
)
->from('(' . $subQuerySql . ')', 't')
->leftJoin('t', 'short_urls_in_tags', 'st', $nativeQb->expr()->eq('t.id_0', 'st.tag_id'))
->leftJoin('st', 'short_urls', 's', $nativeQb->expr()->eq('s.id', 'st.short_url_id'))
->leftJoin('st', 'visits', 'v', $nativeQb->expr()->eq('s.id', 'v.short_url_id'))
->groupBy('t.id_0', 't.name_1')
->orderBy('t.name_1', 'ASC'); // TODO Make dynamic
// Apply API key role conditions to the native query too, as they will affect the amounts on the aggregates
$apiKey?->mapRoles(fn (string $roleName, array $meta) => match ($roleName) {
Role::DOMAIN_SPECIFIC => $nativeQb->andWhere(
$nativeQb->expr()->eq('s.domain_id', $conn->quote(Role::domainIdFromMeta($meta))),
),
Role::AUTHORED_SHORT_URLS => $nativeQb->andWhere(
$nativeQb->expr()->eq('s.author_api_key_id', $conn->quote($apiKey->getId())),
),
default => $nativeQb,
});
$rsm = new ResultSetMappingBuilder($this->getEntityManager());
$rsm->addRootEntityFromClassMetadata(Tag::class, 't');
$rsm->addScalarResult('short_urls_count', 'shortUrlsCount');
$rsm->addScalarResult('visits_count', 'visitsCount');
return map(
$qb->getQuery()->getResult(),
static fn (array $row) => new TagInfo($row['tag'], (int) $row['shortUrlsCount'], (int) $row['visitsCount']),
$this->getEntityManager()->createNativeQuery($nativeQb->getSQL(), $rsm)->getResult(),
static fn (array $row) => new TagInfo($row[0], (int) $row['shortUrlsCount'], (int) $row['visitsCount']),
);
}

View file

@ -204,13 +204,13 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo
$qb->select('v.id')
->orderBy('v.id', 'DESC')
// Falling back to values that will behave as no limit/offset, but will workaround MS SQL not allowing
// Falling back to values that will behave as no limit/offset, but will work around MS SQL not allowing
// order on sub-queries without offset
->setMaxResults($limit ?? PHP_INT_MAX)
->setFirstResult($offset ?? 0);
$subQuery = $qb->getQuery()->getSQL();
// A native query builder needs to be used here because DQL and ORM query builders do not accept
// A native query builder needs to be used here, because DQL and ORM query builders do not support
// sub-queries at "from" and "join" level.
// If no sub-query is used, then performance drops dramatically while the "offset" grows.
$nativeQb = $this->getEntityManager()->getConnection()->createQueryBuilder();

View file

@ -23,11 +23,18 @@ abstract class AbstractTagsPaginatorAdapter implements AdapterInterface
public function getNbResults(): int
{
return (int) $this->repo->matchSingleScalarResult(Spec::andX(
// FIXME I don't think using Spec::selectNew is the correct thing here, ideally it should be Spec::select,
// but seems to be the only way to use Spec::COUNT(...)
$conditions = [
// FIXME I don't think using Spec::selectNew is the correct thing in this context.
// Ideally it should be Spec::select, but seems to be the only way to use Spec::COUNT(...).
Spec::selectNew(Tag::class, Spec::COUNT('id', true)),
new WithApiKeySpecsEnsuringJoin($this->apiKey),
));
];
$searchTerm = $this->params->searchTerm();
if ($searchTerm !== null) {
$conditions[] = Spec::like('name', $searchTerm);
}
return (int) $this->repo->matchSingleScalarResult(Spec::andX(...$conditions));
}
}

View file

@ -54,7 +54,7 @@ class TagRepositoryTest extends DatabaseTestCase
/**
* @test
* @dataProvider provideFilterings
* @dataProvider provideFilters
*/
public function properTagsInfoIsReturned(?TagsListFiltering $filtering, callable $asserts): void
{
@ -84,7 +84,7 @@ class TagRepositoryTest extends DatabaseTestCase
$asserts($result, $names);
}
public function provideFilterings(): iterable
public function provideFilters(): iterable
{
$noFiltersAsserts = static function (array $result, array $tagNames): void {
/** @var TagInfo[] $result */

View file

@ -23,8 +23,13 @@ class TagsPaginatorAdapterTest extends DatabaseTestCase
* @test
* @dataProvider provideFilters
*/
public function expectedListOfTagsIsReturned(?string $searchTerm, int $offset, int $length, int $expected): void
{
public function expectedListOfTagsIsReturned(
?string $searchTerm,
int $offset,
int $length,
int $expectedSliceSize,
int $expectedTotalCount,
): void {
$names = ['foo', 'bar', 'baz', 'another'];
foreach ($names as $name) {
$this->getEntityManager()->persist(new Tag($name));
@ -33,20 +38,20 @@ class TagsPaginatorAdapterTest extends DatabaseTestCase
$adapter = new TagsPaginatorAdapter($this->repo, TagsParams::fromRawData(['searchTerm' => $searchTerm]), null);
self::assertCount($expected, $adapter->getSlice($offset, $length));
self::assertEquals(4, $adapter->getNbResults());
self::assertCount($expectedSliceSize, $adapter->getSlice($offset, $length));
self::assertEquals($expectedTotalCount, $adapter->getNbResults());
}
public function provideFilters(): iterable
{
yield [null, 0, 10, 4];
yield [null, 2, 10, 2];
yield [null, 1, 3, 3];
yield [null, 3, 3, 1];
yield [null, 0, 2, 2];
yield ['ba', 0, 10, 2];
yield ['ba', 0, 1, 1];
yield ['foo', 0, 10, 1];
yield ['a', 0, 10, 3];
yield [null, 0, 10, 4, 4];
yield [null, 2, 10, 2, 4];
yield [null, 1, 3, 3, 4];
yield [null, 3, 3, 1, 4];
yield [null, 0, 2, 2, 4];
yield ['ba', 0, 10, 2, 2];
yield ['ba', 0, 1, 1, 2];
yield ['foo', 0, 10, 1, 1];
yield ['a', 0, 10, 3, 3];
}
}