diff --git a/composer.json b/composer.json index 597cc4b1..922057d8 100644 --- a/composer.json +++ b/composer.json @@ -53,7 +53,7 @@ "shlinkio/shlink-common": "^3.2.0", "shlinkio/shlink-config": "^1.0", "shlinkio/shlink-event-dispatcher": "^1.4", - "shlinkio/shlink-importer": "^1.0", + "shlinkio/shlink-importer": "^1.0.1", "shlinkio/shlink-installer": "^5.1.0", "shlinkio/shlink-ip-geolocation": "^1.5", "symfony/console": "^5.1", diff --git a/module/Core/src/Entity/ShortUrl.php b/module/Core/src/Entity/ShortUrl.php index 34883127..ec752ba9 100644 --- a/module/Core/src/Entity/ShortUrl.php +++ b/module/Core/src/Entity/ShortUrl.php @@ -15,7 +15,7 @@ use Shlinkio\Shlink\Core\Exception\ShortCodeCannotBeRegeneratedException; use Shlinkio\Shlink\Core\Model\ShortUrlEdit; use Shlinkio\Shlink\Core\Model\ShortUrlMeta; use Shlinkio\Shlink\Core\Validation\ShortUrlMetaInputFilter; -use Shlinkio\Shlink\Importer\Model\ShlinkUrl; +use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl; use function count; use function Shlinkio\Shlink\Core\generateRandomShortCode; @@ -58,7 +58,7 @@ class ShortUrl extends AbstractEntity } public static function fromImport( - ShlinkUrl $url, + ImportedShlinkUrl $url, string $source, bool $importShortCode, ?DomainResolverInterface $domainResolver = null diff --git a/module/Core/src/Importer/ImportedLinksProcessor.php b/module/Core/src/Importer/ImportedLinksProcessor.php index cb3ca57b..cb10ad15 100644 --- a/module/Core/src/Importer/ImportedLinksProcessor.php +++ b/module/Core/src/Importer/ImportedLinksProcessor.php @@ -7,9 +7,11 @@ namespace Shlinkio\Shlink\Core\Importer; use Doctrine\ORM\EntityManagerInterface; use Shlinkio\Shlink\Core\Domain\Resolver\DomainResolverInterface; use Shlinkio\Shlink\Core\Entity\ShortUrl; +use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface; +use Shlinkio\Shlink\Core\Util\DoctrineBatchIterator; use Shlinkio\Shlink\Core\Util\TagManagerTrait; use Shlinkio\Shlink\Importer\ImportedLinksProcessorInterface; -use Shlinkio\Shlink\Importer\Model\ShlinkUrl; +use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl; class ImportedLinksProcessor implements ImportedLinksProcessorInterface { @@ -25,31 +27,28 @@ class ImportedLinksProcessor implements ImportedLinksProcessorInterface } /** - * @param ShlinkUrl[] $shlinkUrls + * @param iterable|ImportedShlinkUrl[] $shlinkUrls */ public function process(iterable $shlinkUrls, string $source, array $params): void { + /** @var ShortUrlRepositoryInterface $shortUrlRepo */ + $shortUrlRepo = $this->em->getRepository(ShortUrl::class); $importShortCodes = $params['import_short_codes']; - $count = 0; - $persistBlock = 100; + $iterable = new DoctrineBatchIterator($shlinkUrls, $this->em, 100); - foreach ($shlinkUrls as $url) { - $count++; + /** @var ImportedShlinkUrl $url */ + foreach ($iterable as $url) { + // Skip already imported URLs + if ($shortUrlRepo->importedUrlExists($url, $source, $importShortCodes)) { + continue; + } $shortUrl = ShortUrl::fromImport($url, $source, $importShortCodes, $this->domainResolver); $shortUrl->setTags($this->tagNamesToEntities($this->em, $url->tags())); // TODO Handle errors while creating short URLs, to avoid making the whole process fail + // * Duplicated short code $this->em->persist($shortUrl); - - // Flush and clear after X iterations - if ($count % $persistBlock === 0) { - $this->em->flush(); - $this->em->clear(); - } } - - $this->em->flush(); - $this->em->clear(); } } diff --git a/module/Core/src/Repository/ShortUrlRepository.php b/module/Core/src/Repository/ShortUrlRepository.php index e2cf578f..5cf7d997 100644 --- a/module/Core/src/Repository/ShortUrlRepository.php +++ b/module/Core/src/Repository/ShortUrlRepository.php @@ -11,6 +11,7 @@ use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Model\ShortUrlMeta; use Shlinkio\Shlink\Core\Model\ShortUrlsOrdering; +use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl; use function array_column; use function array_key_exists; @@ -254,4 +255,16 @@ DQL; return $qb->getQuery()->getOneOrNullResult(); } + + public function importedUrlExists(ImportedShlinkUrl $url, string $source, bool $importShortCodes): bool + { + $findConditions = ['importSource' => $source]; + if ($importShortCodes) { + $findConditions['shortCode'] = $url->shortCode(); + } else { + $findConditions['longUrl'] = $url->longUrl(); + } + + return $this->count($findConditions) > 0; + } } diff --git a/module/Core/src/Repository/ShortUrlRepositoryInterface.php b/module/Core/src/Repository/ShortUrlRepositoryInterface.php index 65278a85..fac50980 100644 --- a/module/Core/src/Repository/ShortUrlRepositoryInterface.php +++ b/module/Core/src/Repository/ShortUrlRepositoryInterface.php @@ -9,6 +9,7 @@ use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Model\ShortUrlMeta; use Shlinkio\Shlink\Core\Model\ShortUrlsOrdering; +use Shlinkio\Shlink\Importer\Model\ImportedShlinkUrl; interface ShortUrlRepositoryInterface extends ObjectRepository { @@ -30,4 +31,6 @@ interface ShortUrlRepositoryInterface extends ObjectRepository public function shortCodeIsInUse(string $slug, ?string $domain): bool; public function findOneMatching(string $url, array $tags, ShortUrlMeta $meta): ?ShortUrl; + + public function importedUrlExists(ImportedShlinkUrl $url, string $source, bool $importShortCodes): bool; } diff --git a/module/Core/src/Util/DoctrineBatchIterator.php b/module/Core/src/Util/DoctrineBatchIterator.php new file mode 100644 index 00000000..05311b09 --- /dev/null +++ b/module/Core/src/Util/DoctrineBatchIterator.php @@ -0,0 +1,67 @@ +resultSet = $resultSet; + $this->em = $em; + $this->batchSize = $batchSize; + } + + /** + * @throws Throwable + */ + public function getIterator(): iterable + { + $iteration = 0; + $resultSet = $this->resultSet; + + $this->em->beginTransaction(); + + try { + foreach ($resultSet as $key => $value) { + $iteration++; + yield $key => $value; + $this->flushAndClearBatch($iteration); + } + } catch (Throwable $e) { + $this->em->rollback(); + + throw $e; + } + + $this->flushAndClearEntityManager(); + $this->em->commit(); + } + + private function flushAndClearBatch(int $iteration): void + { + if ($iteration % $this->batchSize) { + return; + } + + $this->flushAndClearEntityManager(); + } + + private function flushAndClearEntityManager(): void + { + $this->em->flush(); + $this->em->clear(); + } +}