feat(feedmerge): remove duplicates based off of title too (#4392)

This commit is contained in:
Dag 2025-01-03 08:17:47 +01:00 committed by GitHub
parent db3899f2e6
commit 97f7df0d06
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -6,8 +6,10 @@ class FeedMergeBridge extends FeedExpander
const NAME = 'FeedMerge';
const URI = 'https://github.com/RSS-Bridge/rss-bridge';
const DESCRIPTION = <<<'TEXT'
This bridge merges two or more feeds into a single feed. Max 10 items are fetched from each feed.
TEXT;
This bridge merges two or more feeds into a single feed. <br>
Max 10 latest items are fetched from each individual feed. <br>
Items with identical url or title are considered duplicates (and are removed). <br>
TEXT;
const PARAMETERS = [
[
@ -36,11 +38,11 @@ TEXT;
];
/**
* todo: Consider a strategy which produces a shorter feed url
* TODO: Consider a strategy which produces a shorter feed url
*/
public function collectData()
{
$limit = (int)($this->getInput('limit') ?: 10);
$limit = (int)($this->getInput('limit') ?: 99);
$feeds = [
$this->getInput('feed_1'),
$this->getInput('feed_2'),
@ -61,7 +63,7 @@ TEXT;
if (count($feeds) > 1) {
// Allow one or more feeds to fail
try {
$this->collectExpandableDatas($feed);
$this->collectExpandableDatas($feed, 10);
} catch (HttpException $e) {
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
// This feed item might be spammy. Considering dropping it.
@ -80,31 +82,48 @@ TEXT;
throw $e;
}
} else {
$this->collectExpandableDatas($feed);
$this->collectExpandableDatas($feed, 10);
}
}
// If $this->items is empty we should consider throw exception here
// Sort by timestamp descending
// Sort by timestamp, uri, title in descending order
usort($this->items, function ($a, $b) {
$t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title'];
$t2 = $b['timestamp'] ?? $b['uri'] ?? $b['title'];
return $t2 <=> $t1;
});
// Remove duplicates by using url as unique key
// Remove duplicates by url
$items = [];
foreach ($this->items as $item) {
$index = $item['uri'] ?? null;
if ($index) {
// Overwrite duplicates
$items[$index] = $item;
$uri = $item['uri'] ?? null;
if ($uri) {
// Insert or override the existing duplicate
$items[$uri] = $item;
} else {
// The item doesn't have a uri!
$items[] = $item;
}
}
$this->items = array_slice(array_values($items), 0, $limit);
$this->items = array_values($items);
// Remove duplicates by title
$items = [];
foreach ($this->items as $item) {
$title = $item['title'] ?? null;
if ($title) {
// Insert or override the existing duplicate
$items[$title] = $item;
} else {
// The item doesn't have a title!
$items[] = $item;
}
}
$this->items = array_values($items);
$this->items = array_slice($this->items, 0, $limit);
}
public function getIcon()