Added logic to resolve crawlable short codes

This commit is contained in:
Alejandro Celaya 2021-05-22 09:34:42 +02:00
parent e6ce84aa14
commit 3ef02d46c0
6 changed files with 88 additions and 4 deletions

View file

@ -59,7 +59,7 @@ return [
Importer\ImportedLinksProcessor::class => ConfigAbstractFactory::class,
Crawling\CrawlingHelper::class => InvokableFactory::class,
Crawling\CrawlingHelper::class => ConfigAbstractFactory::class,
],
'aliases' => [
@ -150,6 +150,8 @@ return [
Service\ShortUrl\ShortCodeHelper::class,
Util\DoctrineBatchHelper::class,
],
Crawling\CrawlingHelper::class => ['em'],
],
];

View file

@ -4,10 +4,23 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Crawling;
use Doctrine\ORM\EntityManagerInterface;
use Shlinkio\Shlink\Core\Entity\ShortUrl;
use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface;
class CrawlingHelper implements CrawlingHelperInterface
{
private EntityManagerInterface $em;
public function __construct(EntityManagerInterface $em)
{
$this->em = $em;
}
public function listCrawlableShortCodes(): iterable
{
return [];
/** @var ShortUrlRepositoryInterface $repo */
$repo = $this->em->getRepository(ShortUrl::class);
yield from $repo->findCrawlableShortCodes();
}
}

View file

@ -288,4 +288,28 @@ class ShortUrlRepository extends EntitySpecificationRepository implements ShortU
$qb->andWhere($qb->expr()->isNull('s.domain'));
}
}
public function findCrawlableShortCodes(): iterable
{
$blockSize = 1000;
$qb = $this->getEntityManager()->createQueryBuilder();
$qb->select('DISTINCT s.shortCode')
->from(ShortUrl::class, 's')
->where($qb->expr()->eq('s.crawlable', ':crawlable'))
->setParameter('crawlable', true)
->setMaxResults($blockSize);
$page = 0;
do {
$qbClone = (clone $qb)->setFirstResult($blockSize * $page);
$iterator = $qbClone->getQuery()->toIterable();
$resultsFound = false;
$page++;
foreach ($iterator as ['shortCode' => $shortCode]) {
$resultsFound = true;
yield $shortCode;
}
} while ($resultsFound);
}
}

View file

@ -41,4 +41,6 @@ interface ShortUrlRepositoryInterface extends ObjectRepository, EntitySpecificat
public function findOneMatching(ShortUrlMeta $meta): ?ShortUrl;
public function findOneByImportedUrl(ImportedShlinkUrl $url): ?ShortUrl;
public function findCrawlableShortCodes(): iterable;
}

View file

@ -66,11 +66,11 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo
do {
$qb = (clone $originalQueryBuilder)->andWhere($qb->expr()->gt('v.id', $lastId));
$iterator = $qb->getQuery()->iterate();
$iterator = $qb->getQuery()->toIterable();
$resultsFound = false;
/** @var Visit $visit */
foreach ($iterator as $key => [$visit]) {
foreach ($iterator as $key => $visit) {
$resultsFound = true;
yield $key => $visit;
}

View file

@ -0,0 +1,43 @@
<?php
declare(strict_types=1);
namespace ShlinkioTest\Shlink\Core\Crawling;
use Doctrine\ORM\EntityManagerInterface;
use PHPUnit\Framework\TestCase;
use Prophecy\PhpUnit\ProphecyTrait;
use Prophecy\Prophecy\ObjectProphecy;
use Shlinkio\Shlink\Core\Crawling\CrawlingHelper;
use Shlinkio\Shlink\Core\Entity\ShortUrl;
use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface;
class CrawlingHelperTest extends TestCase
{
use ProphecyTrait;
private CrawlingHelper $helper;
private ObjectProphecy $em;
protected function setUp(): void
{
$this->em = $this->prophesize(EntityManagerInterface::class);
$this->helper = new CrawlingHelper($this->em->reveal());
}
/** @test */
public function listCrawlableShortCodesDelegatesIntoRepository(): void
{
$repo = $this->prophesize(ShortUrlRepositoryInterface::class);
$findCrawlableShortCodes = $repo->findCrawlableShortCodes()->willReturn([]);
$getRepo = $this->em->getRepository(ShortUrl::class)->willReturn($repo->reveal());
$result = $this->helper->listCrawlableShortCodes();
foreach ($result as $shortCode) {
// Result is a generator and therefore, it needs to be iterated
}
$findCrawlableShortCodes->shouldHaveBeenCalledOnce();
$getRepo->shouldHaveBeenCalledOnce();
}
}