Fix memory leak when importing big amounts of visits

This commit is contained in:
Alejandro Celaya 2023-03-31 09:46:05 +02:00
parent 703965915d
commit 26f4a969c9
7 changed files with 52 additions and 11 deletions

View file

@ -1,4 +1,4 @@
log_errors_max_len=0 log_errors_max_len=0
zend.assertions=1 zend.assertions=1
assert.exception=1 assert.exception=1
memory_limit=256M memory_limit=512M

View file

@ -18,7 +18,6 @@ use Shlinkio\Shlink\Importer\Model\ImportedShlinkOrphanVisit;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl; use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl;
use Shlinkio\Shlink\Importer\Model\ImportResult; use Shlinkio\Shlink\Importer\Model\ImportResult;
use Shlinkio\Shlink\Importer\Params\ImportParams; use Shlinkio\Shlink\Importer\Params\ImportParams;
use Shlinkio\Shlink\Importer\Sources\ImportSource;
use Symfony\Component\Console\Style\OutputStyle; use Symfony\Component\Console\Style\OutputStyle;
use Symfony\Component\Console\Style\StyleInterface; use Symfony\Component\Console\Style\StyleInterface;
use Throwable; use Throwable;
@ -55,8 +54,7 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
private function importShortUrls(StyleInterface $io, iterable $shlinkUrls, ImportParams $params): void private function importShortUrls(StyleInterface $io, iterable $shlinkUrls, ImportParams $params): void
{ {
$importShortCodes = $params->importShortCodes; $importShortCodes = $params->importShortCodes;
$source = $params->source; $iterable = $this->batchHelper->wrapIterable($shlinkUrls, $params->importVisits ? 10 : 100);
$iterable = $this->batchHelper->wrapIterable($shlinkUrls, $source === ImportSource::SHLINK ? 10 : 100);
foreach ($iterable as $importedUrl) { foreach ($iterable as $importedUrl) {
$skipOnShortCodeConflict = static fn (): bool => $io->choice(sprintf( $skipOnShortCodeConflict = static fn (): bool => $io->choice(sprintf(
@ -82,7 +80,10 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
continue; continue;
} }
$resultMessage = $shortUrlImporting->importVisits($importedUrl->visits, $this->em); $resultMessage = $shortUrlImporting->importVisits(
$this->batchHelper->wrapIterable($importedUrl->visits, 100),
$this->em,
);
$io->text(sprintf('%s: %s', $longUrl, $resultMessage)); $io->text(sprintf('%s: %s', $longUrl, $resultMessage));
} }
} }

View file

@ -33,7 +33,7 @@ final class ShortUrlImporting
*/ */
public function importVisits(iterable $visits, EntityManagerInterface $em): string public function importVisits(iterable $visits, EntityManagerInterface $em): string
{ {
$mostRecentImportedDate = $this->shortUrl->mostRecentImportedVisitDate(); $mostRecentImportedDate = $this->resolveShortUrl($em)->mostRecentImportedVisitDate();
$importedVisits = 0; $importedVisits = 0;
foreach ($visits as $importedVisit) { foreach ($visits as $importedVisit) {
@ -42,7 +42,7 @@ final class ShortUrlImporting
continue; continue;
} }
$em->persist(Visit::fromImport($this->shortUrl, $importedVisit)); $em->persist(Visit::fromImport($this->resolveShortUrl($em), $importedVisit));
$importedVisits++; $importedVisits++;
} }
@ -54,4 +54,14 @@ final class ShortUrlImporting
? sprintf('<info>Imported</info> with <info>%s</info> visits', $importedVisits) ? sprintf('<info>Imported</info> with <info>%s</info> visits', $importedVisits)
: sprintf('<comment>Skipped</comment>. Imported <info>%s</info> visits', $importedVisits); : sprintf('<comment>Skipped</comment>. Imported <info>%s</info> visits', $importedVisits);
} }
private function resolveShortUrl(EntityManagerInterface $em): ShortUrl
{
// Instead of directly accessing wrapped ShortUrl entity, try to get it from the EM.
// With this, we will get the same entity from memory if it is known by the EM, but if it was cleared, the EM
// will fetch it again from the database, preventing errors at runtime.
// However, if the EM was not flushed yet, the entity will not be found by ID, but it is known by the EM.
// In that case, we fall back to wrapped ShortUrl entity directly.
return $em->find(ShortUrl::class, $this->shortUrl->getId()) ?? $this->shortUrl;
}
} }

View file

@ -20,7 +20,8 @@ class CrawlableShortCodesQuery extends EntitySpecificationRepository implements
->from(ShortUrl::class, 's') ->from(ShortUrl::class, 's')
->where($qb->expr()->eq('s.crawlable', ':crawlable')) ->where($qb->expr()->eq('s.crawlable', ':crawlable'))
->setParameter('crawlable', true) ->setParameter('crawlable', true)
->setMaxResults($blockSize); ->setMaxResults($blockSize)
->orderBy('s.shortCode');
$page = 0; $page = 0;
do { do {

View file

@ -17,12 +17,14 @@ class DoctrineBatchHelper implements DoctrineBatchHelperInterface
} }
/** /**
* @template T
* @param iterable<T> $resultSet
* @return iterable<T>
* @throws Throwable * @throws Throwable
*/ */
public function wrapIterable(iterable $resultSet, int $batchSize): iterable public function wrapIterable(iterable $resultSet, int $batchSize): iterable
{ {
$iteration = 0; $iteration = 0;
$this->em->beginTransaction(); $this->em->beginTransaction();
try { try {
@ -33,7 +35,6 @@ class DoctrineBatchHelper implements DoctrineBatchHelperInterface
} }
} catch (Throwable $e) { } catch (Throwable $e) {
$this->em->rollback(); $this->em->rollback();
throw $e; throw $e;
} }

View file

@ -22,8 +22,8 @@ class RobotsTest extends ApiTestCase
# https://www.robotstxt.org/orig.html # https://www.robotstxt.org/orig.html
User-agent: * User-agent: *
Allow: /custom
Allow: /abc123 Allow: /abc123
Allow: /custom
Disallow: / Disallow: /
ROBOTS, ROBOTS,
$body, $body,

View file

@ -7,6 +7,7 @@ namespace ShlinkioTest\Shlink\Core\Importer;
use Cake\Chronos\Chronos; use Cake\Chronos\Chronos;
use Doctrine\Common\Collections\ArrayCollection; use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\MockObject\MockObject;
@ -184,6 +185,7 @@ class ImportedLinksProcessorTest extends TestCase
$this->em->expects($this->exactly($amountOfPersistedVisits + ($foundShortUrl === null ? 1 : 0)))->method( $this->em->expects($this->exactly($amountOfPersistedVisits + ($foundShortUrl === null ? 1 : 0)))->method(
'persist', 'persist',
)->with($this->callback(fn (object $arg) => $arg instanceof ShortUrl || $arg instanceof Visit)); )->with($this->callback(fn (object $arg) => $arg instanceof ShortUrl || $arg instanceof Visit));
$this->em->expects($this->any())->method('find')->willReturn(null);
$this->io->expects($this->once())->method('text')->with($this->stringContains($expectedOutput)); $this->io->expects($this->once())->method('text')->with($this->stringContains($expectedOutput));
$this->processor->process($this->io, ImportResult::withShortUrls([$importedUrl]), $this->buildParams()); $this->processor->process($this->io, ImportResult::withShortUrls([$importedUrl]), $this->buildParams());
@ -229,6 +231,32 @@ class ImportedLinksProcessorTest extends TestCase
]; ];
} }
#[Test, DataProvider('provideFoundShortUrls')]
public function visitsArePersistedWithProperShortUrl(?ShortUrl $foundShortUrl): void
{
$originalShortUrl = ShortUrl::withLongUrl('https://foo');
$this->em->method('getRepository')->with(ShortUrl::class)->willReturn($this->repo);
$this->repo->expects($this->once())->method('findOneByImportedUrl')->willReturn($originalShortUrl);
$this->em->expects($this->exactly(2))->method('find')->willReturn($foundShortUrl);
$this->em->expects($this->once())->method('persist')->willReturnCallback(
static fn (Visit $visit) => Assert::assertSame($foundShortUrl ?? $originalShortUrl, $visit->getShortUrl()),
);
$now = Chronos::now();
$this->processor->process($this->io, ImportResult::withShortUrls([
new ImportedShlinkUrl(ImportSource::SHLINK, 'https://s', [], $now, null, 's', null, [
new ImportedShlinkVisit('', '', $now, null),
]),
]), $this->buildParams());
}
public static function provideFoundShortUrls(): iterable
{
yield [null];
yield [ShortUrl::withLongUrl('https://bar')];
}
/** /**
* @param iterable<ImportedShlinkOrphanVisit> $visits * @param iterable<ImportedShlinkOrphanVisit> $visits
*/ */