From 437afd67e0bc339bf328488f4c7411d71a872647 Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 24 Sep 2023 18:15:14 +0200 Subject: [PATCH] fix: various fixes (#3702) * fix: symfonycasts * various fixes --- bridges/AtmoNouvelleAquitaineBridge.php | 3 + bridges/BrutBridge.php | 77 +++++-------------------- bridges/SitemapBridge.php | 2 +- bridges/SymfonyCastsBridge.php | 21 ++++--- composer.json | 4 +- 5 files changed, 32 insertions(+), 75 deletions(-) diff --git a/bridges/AtmoNouvelleAquitaineBridge.php b/bridges/AtmoNouvelleAquitaineBridge.php index d4244fa9..d2621b9a 100644 --- a/bridges/AtmoNouvelleAquitaineBridge.php +++ b/bridges/AtmoNouvelleAquitaineBridge.php @@ -30,6 +30,9 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract public function collectData() { + // this bridge is broken and unmaintained + return; + $uri = self::URI . '/monair/commune/' . $this->getInput('cities'); $html = getSimpleHTMLDOM($uri); diff --git a/bridges/BrutBridge.php b/bridges/BrutBridge.php index aa2a1b4d..0db0851a 100644 --- a/bridges/BrutBridge.php +++ b/bridges/BrutBridge.php @@ -38,50 +38,20 @@ class BrutBridge extends BridgeAbstract ] ]; - const CACHE_TIMEOUT = 1800; // 30 mins - - private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/'; - public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); - - $results = $html->find('div.results', 0); - - foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) { - $item = []; - - $videoPath = self::URI . $li->children(0)->href; - $videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600); - - $json = $this->extractJson($videoPageHtml); - $id = array_keys((array) $json->media->index)[0]; - - $item['uri'] = $videoPath; - $item['title'] = $json->media->index->$id->title; - $item['timestamp'] = $json->media->index->$id->published_at; - $item['enclosures'][] = $json->media->index->$id->media->thumbnail; - - $description = $json->media->index->$id->description; - $article = ''; - - if (is_null($json->media->index->$id->media->seo_article) === false) { - $article = markdownToHtml($json->media->index->$id->media->seo_article); - } - - $item['content'] = << - - -

{$description}

- {$article} -EOD; - - $this->items[] = $item; - - if (count($this->items) >= 10) { - break; - } + $url = $this->getURI(); + $html = getSimpleHTMLDOM($url); + $regex = '/window.__PRELOADED_STATE__ = (.*);/'; + preg_match($regex, $html, $parts); + $data = Json::decode($parts[1], false); + foreach ($data->medias->index as $uid => $media) { + $this->items[] = [ + 'uid' => $uid, + 'title' => $media->metadata->slug, + 'uri' => $media->share_url, + 'timestamp' => $media->published_at, + ]; } } @@ -90,35 +60,14 @@ EOD; if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category'); } - return parent::getURI(); } public function getName() { if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { - return $this->getKey('category') . ' - ' . - $this->getKey('edition') . ' - Brut.'; + return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.'; } - return parent::getName(); } - - /** - * Extract JSON from page - */ - private function extractJson($html) - { - if (!preg_match($this->jsonRegex, $html, $parts)) { - returnServerError('Failed to extract data from page'); - } - - $data = json_decode($parts[1]); - - if ($data === false) { - returnServerError('Failed to decode extracted data'); - } - - return $data; - } } diff --git a/bridges/SitemapBridge.php b/bridges/SitemapBridge.php index eec9d658..660504ba 100644 --- a/bridges/SitemapBridge.php +++ b/bridges/SitemapBridge.php @@ -72,7 +72,7 @@ class SitemapBridge extends CssSelectorBridge $sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map)); $links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit); - if (empty($links) && empty(sitemapXmlToList($sitemap_xml))) { + if (empty($links) && empty($this->sitemapXmlToList($sitemap_xml))) { returnClientError('Could not retrieve URLs with Timestamps from Sitemap: ' . $sitemap_url); } diff --git a/bridges/SymfonyCastsBridge.php b/bridges/SymfonyCastsBridge.php index 29ba87cd..e3261d98 100644 --- a/bridges/SymfonyCastsBridge.php +++ b/bridges/SymfonyCastsBridge.php @@ -10,22 +10,27 @@ class SymfonyCastsBridge extends BridgeAbstract public function collectData() { - $html = getSimpleHTMLDOM('https://symfonycasts.com/updates/find'); - $dives = $html->find('div'); + $url = 'https://symfonycasts.com/updates/find'; + $html = getSimpleHTMLDOM($url); + + /** @var simple_html_dom_node[] $dives */ + $dives = $html->find('div.user-notification-not-viewed'); - /* @var simple_html_dom $div */ foreach ($dives as $div) { - $id = $div->getAttribute('data-mark-update-id-value'); $type = $div->find('h5', 0); - $title = $div->find('span', 0); + $title = $div->find('a', 0); $dateString = $div->find('h5.font-gray', 0); $href = $div->find('a', 0); - $url = 'https://symfonycasts.com' . $href->getAttribute('href'); + $hrefAttribute = $href->getAttribute('href'); + $url = 'https://symfonycasts.com' . $hrefAttribute; - $item = []; // Create an empty item - $item['uid'] = $id; + $item = []; + $item['uid'] = $div->getAttribute('data-mark-update-update-url-value'); $item['title'] = $title->innertext; + + // this natural language date string does not work $item['timestamp'] = $dateString->innertext; + $item['content'] = $type->plaintext . '' . $title . ''; $item['uri'] = $url; $this->items[] = $item; // Add item to the list diff --git a/composer.json b/composer.json index a08c9666..31e31d74 100644 --- a/composer.json +++ b/composer.json @@ -28,8 +28,7 @@ "ext-openssl": "*", "ext-libxml": "*", "ext-simplexml": "*", - "ext-json": "*", - "ext-intl": "*" + "ext-json": "*" }, "require-dev": { "phpunit/phpunit": "^9", @@ -39,6 +38,7 @@ "ext-memcached": "Allows to use memcached as cache type", "ext-sqlite3": "Allows to use an SQLite database for caching", "ext-zip": "Required for FDroidRepoBridge", + "ext-intl": "Required for OLXBridge", "ext-dom": "Allows to use some bridges based on XPath expressions" }, "autoload-dev": {