diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index ea21b436..99580499 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -21,10 +21,11 @@ class CommonDreamsBridge extends FeedExpander private function extractContent($url) { - $html3 = getSimpleHTMLDOMCached($url); - $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; - $html3->clear(); - unset($html3); + $dom = getSimpleHTMLDOMCached($url); + $summary = $dom->find('div.node__body', 0); + $text = $summary->innertext; + $dom->clear(); + unset($dom); return $text; } } diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php index 98b8f840..0636bb46 100644 --- a/bridges/TheHackerNewsBridge.php +++ b/bridges/TheHackerNewsBridge.php @@ -13,72 +13,80 @@ class TheHackerNewsBridge extends BridgeAbstract $limit = 0; foreach ($html->find('div.body-post') as $element) { - if ($limit < 5) { - $article_url = $element->find('a.story-link', 0)->href; - $article_author = trim($element->find('i.icon-user', 0)->parent()->plaintext); - $article_author = str_replace('', '', $article_author); - $article_title = $element->find('h2.home-title', 0)->plaintext; + if ($limit >= 5) { + break; + } - $article_timestamp = time(); - //Date without time - $calendar = $element->find('i.icon-calendar', 0); - if ($calendar) { + $article_author = null; + $icon_user = $element->find('i.icon-user', 0); + if ($icon_user) { + $article_author = trim($icon_user->parent()->plaintext); + $article_author = str_replace('', '', $article_author); + } + $article_title = $element->find('h2.home-title', 0)->plaintext; + + $article_timestamp = time(); + //Date without time + $calendar = $element->find('i.icon-calendar', 0); + if ($calendar) { + $article_timestamp = strtotime( + extractFromDelimiters( + $calendar->parent()->outertext, + '</i>', + '<span>' + ) + ); + } + + //Article thumbnail in lazy-loading image + if (is_object($element->find('img[data-echo]', 0))) { + $article_thumbnail = [ + extractFromDelimiters( + $element->find('img[data-echo]', 0)->outertext, + "data-echo='", + "'" + ) + ]; + } else { + $article_thumbnail = []; + } + + $article_url = $element->find('a.story-link', 0)->href; + $article = getSimpleHTMLDOMCached($article_url); + if ($article) { + //Article body + $var = $article->find('div.articlebody', 0); + if ($var) { + $contents = $var->innertext; + $contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_'); + $contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>'); + $contents = stripWithDelimiters($contents, '<script', '</script>'); + } + //Date with time + if (is_object($article->find('meta[itemprop=dateModified]', 0))) { $article_timestamp = strtotime( extractFromDelimiters( - $calendar->parent()->outertext, - '</i>', - '<span>' + $article->find('meta[itemprop=dateModified]', 0)->outertext, + "content='", + "'" ) ); } - - //Article thumbnail in lazy-loading image - if (is_object($element->find('img[data-echo]', 0))) { - $article_thumbnail = [ - extractFromDelimiters( - $element->find('img[data-echo]', 0)->outertext, - "data-echo='", - "'" - ) - ]; - } else { - $article_thumbnail = []; - } - - $article = getSimpleHTMLDOMCached($article_url); - if ($article) { - //Article body - $var = $article->find('div.articlebody', 0); - if ($var) { - $contents = $var->innertext; - $contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_'); - $contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>'); - $contents = stripWithDelimiters($contents, '<script', '</script>'); - } - //Date with time - if (is_object($article->find('meta[itemprop=dateModified]', 0))) { - $article_timestamp = strtotime( - extractFromDelimiters( - $article->find('meta[itemprop=dateModified]', 0)->outertext, - "content='", - "'" - ) - ); - } - } else { - $contents = 'Could not request TheHackerNews: ' . $article_url; - } - - $item = []; - $item['uri'] = $article_url; - $item['title'] = $article_title; - $item['author'] = $article_author; - $item['enclosures'] = $article_thumbnail; - $item['timestamp'] = $article_timestamp; - $item['content'] = trim($contents ?? ''); - $this->items[] = $item; - $limit++; + } else { + $contents = 'Could not request TheHackerNews: ' . $article_url; } + + $item = []; + $item['uri'] = $article_url; + $item['title'] = $article_title; + if ($article_author) { + $item['author'] = $article_author; + } + $item['enclosures'] = $article_thumbnail; + $item['timestamp'] = $article_timestamp; + $item['content'] = trim($contents ?? ''); + $this->items[] = $item; + $limit++; } } } diff --git a/bridges/WikiLeaksBridge.php b/bridges/WikiLeaksBridge.php index 512b1c30..08144688 100644 --- a/bridges/WikiLeaksBridge.php +++ b/bridges/WikiLeaksBridge.php @@ -93,8 +93,10 @@ class WikiLeaksBridge extends BridgeAbstract $item['title'] = $article->find('h3', 0)->plaintext; $item['uri'] = static::URI . $article->find('h3 a', 0)->href; $item['content'] = $article->find('div.introduction', 0)->plaintext; - $item['timestamp'] = strtotime($article->find('div.timestamp', 0)->plaintext); - + $timestamp = $article->find('div.timestamp', 0); + if ($timestamp) { + $item['timestamp'] = strtotime($timestamp->plaintext); + } $this->items[] = $item; } }