Fix memory leak when importing big amounts of visits

This commit is contained in:
Alejandro Celaya 2023-03-31 09:46:05 +02:00
parent 703965915d
commit 26f4a969c9
7 changed files with 52 additions and 11 deletions

View file

@ -1,4 +1,4 @@
log_errors_max_len=0
zend.assertions=1
assert.exception=1
memory_limit=256M
memory_limit=512M

View file

@ -18,7 +18,6 @@ use Shlinkio\Shlink\Importer\Model\ImportedShlinkOrphanVisit;
use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl;
use Shlinkio\Shlink\Importer\Model\ImportResult;
use Shlinkio\Shlink\Importer\Params\ImportParams;
use Shlinkio\Shlink\Importer\Sources\ImportSource;
use Symfony\Component\Console\Style\OutputStyle;
use Symfony\Component\Console\Style\StyleInterface;
use Throwable;
@ -55,8 +54,7 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
private function importShortUrls(StyleInterface $io, iterable $shlinkUrls, ImportParams $params): void
{
$importShortCodes = $params->importShortCodes;
$source = $params->source;
$iterable = $this->batchHelper->wrapIterable($shlinkUrls, $source === ImportSource::SHLINK ? 10 : 100);
$iterable = $this->batchHelper->wrapIterable($shlinkUrls, $params->importVisits ? 10 : 100);
foreach ($iterable as $importedUrl) {
$skipOnShortCodeConflict = static fn (): bool => $io->choice(sprintf(
@ -82,7 +80,10 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface
continue;
}
$resultMessage = $shortUrlImporting->importVisits($importedUrl->visits, $this->em);
$resultMessage = $shortUrlImporting->importVisits(
$this->batchHelper->wrapIterable($importedUrl->visits, 100),
$this->em,
);
$io->text(sprintf('%s: %s', $longUrl, $resultMessage));
}
}

View file

@ -33,7 +33,7 @@ final class ShortUrlImporting
*/
public function importVisits(iterable $visits, EntityManagerInterface $em): string
{
$mostRecentImportedDate = $this->shortUrl->mostRecentImportedVisitDate();
$mostRecentImportedDate = $this->resolveShortUrl($em)->mostRecentImportedVisitDate();
$importedVisits = 0;
foreach ($visits as $importedVisit) {
@ -42,7 +42,7 @@ final class ShortUrlImporting
continue;
}
$em->persist(Visit::fromImport($this->shortUrl, $importedVisit));
$em->persist(Visit::fromImport($this->resolveShortUrl($em), $importedVisit));
$importedVisits++;
}
@ -54,4 +54,14 @@ final class ShortUrlImporting
? sprintf('<info>Imported</info> with <info>%s</info> visits', $importedVisits)
: sprintf('<comment>Skipped</comment>. Imported <info>%s</info> visits', $importedVisits);
}
private function resolveShortUrl(EntityManagerInterface $em): ShortUrl
{
// Instead of directly accessing wrapped ShortUrl entity, try to get it from the EM.
// With this, we will get the same entity from memory if it is known by the EM, but if it was cleared, the EM
// will fetch it again from the database, preventing errors at runtime.
// However, if the EM was not flushed yet, the entity will not be found by ID, but it is known by the EM.
// In that case, we fall back to wrapped ShortUrl entity directly.
return $em->find(ShortUrl::class, $this->shortUrl->getId()) ?? $this->shortUrl;
}
}

View file

@ -20,7 +20,8 @@ class CrawlableShortCodesQuery extends EntitySpecificationRepository implements
->from(ShortUrl::class, 's')
->where($qb->expr()->eq('s.crawlable', ':crawlable'))
->setParameter('crawlable', true)
->setMaxResults($blockSize);
->setMaxResults($blockSize)
->orderBy('s.shortCode');
$page = 0;
do {

View file

@ -17,12 +17,14 @@ class DoctrineBatchHelper implements DoctrineBatchHelperInterface
}
/**
* @template T
* @param iterable<T> $resultSet
* @return iterable<T>
* @throws Throwable
*/
public function wrapIterable(iterable $resultSet, int $batchSize): iterable
{
$iteration = 0;
$this->em->beginTransaction();
try {
@ -33,7 +35,6 @@ class DoctrineBatchHelper implements DoctrineBatchHelperInterface
}
} catch (Throwable $e) {
$this->em->rollback();
throw $e;
}

View file

@ -22,8 +22,8 @@ class RobotsTest extends ApiTestCase
# https://www.robotstxt.org/orig.html
User-agent: *
Allow: /custom
Allow: /abc123
Allow: /custom
Disallow: /
ROBOTS,
$body,

View file

@ -7,6 +7,7 @@ namespace ShlinkioTest\Shlink\Core\Importer;
use Cake\Chronos\Chronos;
use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\ORM\EntityManagerInterface;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\MockObject\MockObject;
@ -184,6 +185,7 @@ class ImportedLinksProcessorTest extends TestCase
$this->em->expects($this->exactly($amountOfPersistedVisits + ($foundShortUrl === null ? 1 : 0)))->method(
'persist',
)->with($this->callback(fn (object $arg) => $arg instanceof ShortUrl || $arg instanceof Visit));
$this->em->expects($this->any())->method('find')->willReturn(null);
$this->io->expects($this->once())->method('text')->with($this->stringContains($expectedOutput));
$this->processor->process($this->io, ImportResult::withShortUrls([$importedUrl]), $this->buildParams());
@ -229,6 +231,32 @@ class ImportedLinksProcessorTest extends TestCase
];
}
#[Test, DataProvider('provideFoundShortUrls')]
public function visitsArePersistedWithProperShortUrl(?ShortUrl $foundShortUrl): void
{
$originalShortUrl = ShortUrl::withLongUrl('https://foo');
$this->em->method('getRepository')->with(ShortUrl::class)->willReturn($this->repo);
$this->repo->expects($this->once())->method('findOneByImportedUrl')->willReturn($originalShortUrl);
$this->em->expects($this->exactly(2))->method('find')->willReturn($foundShortUrl);
$this->em->expects($this->once())->method('persist')->willReturnCallback(
static fn (Visit $visit) => Assert::assertSame($foundShortUrl ?? $originalShortUrl, $visit->getShortUrl()),
);
$now = Chronos::now();
$this->processor->process($this->io, ImportResult::withShortUrls([
new ImportedShlinkUrl(ImportSource::SHLINK, 'https://s', [], $now, null, 's', null, [
new ImportedShlinkVisit('', '', $now, null),
]),
]), $this->buildParams());
}
public static function provideFoundShortUrls(): iterable
{
yield [null];
yield [ShortUrl::withLongUrl('https://bar')];
}
/**
* @param iterable<ImportedShlinkOrphanVisit> $visits
*/