From 5a733b3d821ec857711a65bc451f303a846b5faa Mon Sep 17 00:00:00 2001 From: dag Date: Sun, 10 Apr 2022 18:56:24 +0200 Subject: [PATCH] feat: add limit options to the slowest bridges --- bridges/DarkReadingBridge.php | 6 ++++-- bridges/FindACrewBridge.php | 6 ++++-- bridges/FolhaDeSaoPauloBridge.php | 3 ++- bridges/GQMagazineBridge.php | 6 ++++++ bridges/InternetArchiveBridge.php | 13 ++++++++++--- bridges/NextInpactBridge.php | 7 +++++-- bridges/OpenlyBridge.php | 4 +++- bridges/PcGamerBridge.php | 9 ++++++++- bridges/UnogsBridge.php | 16 +++++++++++++--- bridges/WeLiveSecurityBridge.php | 8 +++++++- bridges/WiredBridge.php | 6 ++++-- bridges/ZDNetBridge.php | 6 ++++-- lib/BridgeAbstract.php | 10 ++++++++++ 13 files changed, 80 insertions(+), 20 deletions(-) diff --git a/bridges/DarkReadingBridge.php b/bridges/DarkReadingBridge.php index 93deb9ce..8fe242dd 100644 --- a/bridges/DarkReadingBridge.php +++ b/bridges/DarkReadingBridge.php @@ -33,7 +33,8 @@ class DarkReadingBridge extends FeedExpander { 'Insider Threats' => '663_Insider%20Threats', 'Vulnerability Management' => '664_Vulnerability%20Management', ) - ) + ), + 'limit' => self::LIMIT, )); public function collectData(){ @@ -48,7 +49,8 @@ class DarkReadingBridge extends FeedExpander { if ($feed_id != '000') { $feed_url .= '?f_n=' . $feed_id . '&f_ln=' . $feed_name; } - $this->collectExpandableDatas($feed_url, 20); + $limit = $this->getInput('limit') ?? 10; + $this->collectExpandableDatas($feed_url, $limit); } protected function parseItem($newsItem){ diff --git a/bridges/FindACrewBridge.php b/bridges/FindACrewBridge.php index 07912680..8282ead1 100644 --- a/bridges/FindACrewBridge.php +++ b/bridges/FindACrewBridge.php @@ -26,7 +26,8 @@ class FindACrewBridge extends BridgeAbstract { 'distance' => array( 'name' => 'Limit boundary of search in KM', 'title' => 'Boundary of the search in kilometers when using longitude and latitude' - ) + ), + 'limit' => self::LIMIT, ) ); @@ -59,7 +60,8 @@ class FindACrewBridge extends BridgeAbstract { $html = getSimpleHTMLDOM($url, $header, $opts) or returnClientError('No results for this query.'); $annonces = $html->find('.css_SrhRst'); - foreach ($annonces as $annonce) { + $limit = $this->getInput('limit') ?? 10; + foreach (array_slice($annonces, 0, $limit) as $annonce) { $item = array(); $link = parent::getURI() . $annonce->find('.lstsum-btn-con a', 0)->href; diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php index 181bd93f..6506fdba 100644 --- a/bridges/FolhaDeSaoPauloBridge.php +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -63,6 +63,7 @@ class FolhaDeSaoPauloBridge extends FeedExpander { $feed_url = self::URI . '/' . $this->getInput('feed'); } Debug::log('URL: ' . $feed_url); - $this->collectExpandableDatas($feed_url, $this->getInput('amount')); + $limit = $this->getInput('amount'); + $this->collectExpandableDatas($feed_url, $limit); } } diff --git a/bridges/GQMagazineBridge.php b/bridges/GQMagazineBridge.php index 8fb71dd7..cacd6159 100644 --- a/bridges/GQMagazineBridge.php +++ b/bridges/GQMagazineBridge.php @@ -32,6 +32,7 @@ class GQMagazineBridge extends BridgeAbstract 'required' => true, 'exampleValue' => 'sexe/news' ), + 'limit' => self::LIMIT, )); const REPLACED_ATTRIBUTES = array( @@ -76,7 +77,12 @@ class GQMagazineBridge extends BridgeAbstract // Since GQ don't want simple class scrapping, let's do it the hard way and ... discover content ! $main = $html->find('main', 0); + $limit = $this->getInput('limit') ?? 10; foreach ($main->find('a') as $link) { + if (count($this->items) >= $limit) { + break; + } + $uri = $link->href; $date = $link->parent()->find('time', 0); diff --git a/bridges/InternetArchiveBridge.php b/bridges/InternetArchiveBridge.php index 969776fe..b9f9d274 100644 --- a/bridges/InternetArchiveBridge.php +++ b/bridges/InternetArchiveBridge.php @@ -23,7 +23,8 @@ class InternetArchiveBridge extends BridgeAbstract { 'Web Archives' => 'web-archive', ), 'defaultValue' => 'uploads', - ) + ), + 'limit' => self::LIMIT, ) ); @@ -72,7 +73,8 @@ class InternetArchiveBridge extends BridgeAbstract { if ($this->getInput('content') !== 'posts') { $detailsDivNumber = 0; - foreach ($html->find('div.results > div[data-id]') as $index => $result) { + $results = $html->find('div.results > div[data-id]'); + foreach ($results as $index => $result) { $item = array(); if (in_array($result->class, $this->skipClasses)) { @@ -110,6 +112,11 @@ class InternetArchiveBridge extends BridgeAbstract { } $detailsDivNumber++; + + $limit = $this->getInput('limit') ?? 10; + if (count($this->items) >= $limit) { + break; + } } } @@ -302,7 +309,7 @@ EOD; $items[] = $item; - if (count($items) >= 10) { + if (count($items) >= $this->getInput('limit') ?? 10) { break; } } diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index c3cca30d..4cac7769 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -55,7 +55,8 @@ class NextInpactBridge extends FeedExpander { 'Hide Brief' => '1', 'Only Brief' => '2' ) - ) + ), + 'limit' => self::LIMIT, )); public function collectData(){ @@ -80,7 +81,9 @@ class NextInpactBridge extends FeedExpander { $feed = 'params'; } - $this->collectExpandableDatas($base_uri . 'rss/' . $feed . '.xml' . $args); + $url = sprintf('%srss/%s.xml%s', $base_uri, $feed, $args); + $limit = $this->getInput('limit') ?? 10; + $this->collectExpandableDatas($url, $limit); } protected function parseItem($newsItem){ diff --git a/bridges/OpenlyBridge.php b/bridges/OpenlyBridge.php index 8aa036ea..3395905d 100644 --- a/bridges/OpenlyBridge.php +++ b/bridges/OpenlyBridge.php @@ -130,7 +130,9 @@ class OpenlyBridge extends BridgeAbstract { $this->feedTitle = $html->find('a.tooltipitem', 0)->plaintext; } - foreach($html->find('div.item') as $div) { + $items = $html->find('div.item'); + $limit = 5; + foreach(array_slice($items, 0, $limit) as $div) { $this->items[] = $this->getArticle($div->find('a', 0)->href); if (count($this->items) >= $this->itemLimit) { diff --git a/bridges/PcGamerBridge.php b/bridges/PcGamerBridge.php index db5a9ded..95261d9c 100644 --- a/bridges/PcGamerBridge.php +++ b/bridges/PcGamerBridge.php @@ -7,11 +7,18 @@ class PcGamerBridge extends BridgeAbstract updates and news on all your favorite PC gaming franchises.'; const MAINTAINER = 'IceWreck, mdemoss'; + const PARAMETERS = [ + [ + 'limit' => self::LIMIT, + ] + ]; + public function collectData() { $html = getSimpleHTMLDOMCached($this->getURI(), 300); $stories = $html->find('a.article-link'); - foreach ($stories as $element) { + $limit = $this->getInput('limit') ?? 10; + foreach (array_slice($stories, 0, $limit) as $element) { $item = array(); $item['uri'] = $element->href; $articleHtml = getSimpleHTMLDOMCached($item['uri']); diff --git a/bridges/UnogsBridge.php b/bridges/UnogsBridge.php index cac18752..f03555b4 100644 --- a/bridges/UnogsBridge.php +++ b/bridges/UnogsBridge.php @@ -17,7 +17,8 @@ class UnogsBridge extends BridgeAbstract { 'What\'s New' => 'new last 7 days', 'Expiring' => 'expiring' ) - ) + ), + 'limit' => self::LIMIT, ), 'Global' => array(), 'Country' => array( @@ -160,8 +161,17 @@ EOD; break; } - $api_url = self::URI . '/api/search?query=' . urlencode($feed) - . ($country_code ? '&countrylist=' . $country_code : '') . '&limit=30'; + $limit = $this->getInput('limit') ?? 30; + + // https://rapidapi.com/unogs/api/unogsng/details + $api_url = sprintf( + '%s/api/search?query=%s%s&limit=%s', + self::URI, + urlencode($feed), + $country_code ? '&countrylist=' . $country_code : '', + $limit + ); + $json_data = $this->getJSON($api_url); $movies = $json_data['results']; diff --git a/bridges/WeLiveSecurityBridge.php b/bridges/WeLiveSecurityBridge.php index 59a094a7..14af1ab3 100644 --- a/bridges/WeLiveSecurityBridge.php +++ b/bridges/WeLiveSecurityBridge.php @@ -5,6 +5,11 @@ class WeLiveSecurityBridge extends FeedExpander { const NAME = 'We Live Security'; const URI = 'https://www.welivesecurity.com/'; const DESCRIPTION = 'Returns the newest articles.'; + const PARAMETERS = [ + [ + 'limit' => self::LIMIT, + ], + ]; protected function parseItem($item){ $item = parent::parseItem($item); @@ -27,6 +32,7 @@ class WeLiveSecurityBridge extends FeedExpander { public function collectData(){ $feed = static::URI . 'feed/'; - $this->collectExpandableDatas($feed); + $limit = $this->getInput('limit') ?? 10; + $this->collectExpandableDatas($feed, $limit); } } diff --git a/bridges/WiredBridge.php b/bridges/WiredBridge.php index d66bf7ff..b15f781f 100644 --- a/bridges/WiredBridge.php +++ b/bridges/WiredBridge.php @@ -22,7 +22,8 @@ class WiredBridge extends FeedExpander { 'WIRED Guides' => 'wired-guide', // /feed/tag/wired-guide/latest/rss 'Photo' => 'photo' // /feed/category/photo/latest/rss ) - ) + ), + 'limit' => self::LIMIT, )); public function collectData(){ @@ -42,7 +43,8 @@ class WiredBridge extends FeedExpander { } $feed_url .= 'rss'; - $this->collectExpandableDatas($feed_url); + $limit = $this->getInput('limit') ?? -1; + $this->collectExpandableDatas($feed_url, $limit); } protected function parseItem($newsItem){ diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 13478029..927e37ae 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -156,7 +156,8 @@ class ZDNetBridge extends FeedExpander { 'ZDNet Government' => 'blog/government' ) ) - ) + ), + 'limit' => self::LIMIT, )); public function collectData(){ @@ -167,7 +168,8 @@ class ZDNetBridge extends FeedExpander { $baseUri = str_replace('www.', 'downloads.', $baseUri); } $url = $baseUri . trim($feed, '/') . '/rss.xml'; - $this->collectExpandableDatas($url); + $limit = $this->getInput('limit') ?? 10; + $this->collectExpandableDatas($url, $limit); } protected function parseItem($item){ diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 82f866aa..0971b13c 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -87,6 +87,16 @@ abstract class BridgeAbstract implements BridgeInterface { */ const TEST_DETECT_PARAMETERS = array(); + /** + * This is a convenient const for the limit option in bridge contexts. + * Can be inlined and modified if necessary. + */ + protected const LIMIT = [ + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + ]; + /** * Holds the list of items collected by the bridge *