diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php index 0636bb46..1f9c34c0 100644 --- a/bridges/TheHackerNewsBridge.php +++ b/bridges/TheHackerNewsBridge.php @@ -10,6 +10,8 @@ class TheHackerNewsBridge extends BridgeAbstract public function collectData() { $html = getSimpleHTMLDOM($this->getURI()); + $html = convertLazyLoading($html); + $html = defaultLinkTo($html, $this->getURI()); $limit = 0; foreach ($html->find('div.body-post') as $element) { @@ -17,74 +19,68 @@ class TheHackerNewsBridge extends BridgeAbstract break; } + // Author (not present on home page) $article_author = null; - $icon_user = $element->find('i.icon-user', 0); - if ($icon_user) { - $article_author = trim($icon_user->parent()->plaintext); - $article_author = str_replace('', '', $article_author); - } + + // Title $article_title = $element->find('h2.home-title', 0)->plaintext; + // Date $article_timestamp = time(); - //Date without time $calendar = $element->find('i.icon-calendar', 0); if ($calendar) { $article_timestamp = strtotime( extractFromDelimiters( $calendar->parent()->outertext, '', - '' + '' ) ); } - //Article thumbnail in lazy-loading image - if (is_object($element->find('img[data-echo]', 0))) { - $article_thumbnail = [ - extractFromDelimiters( - $element->find('img[data-echo]', 0)->outertext, - "data-echo='", - "'" - ) - ]; - } else { - $article_thumbnail = []; + // Thumbnail + $article_thumbnail = []; + if (is_object($element->find('img', 0))) { + $article_thumbnail = [ $element->find('img', 0)->src ]; } + // Content (truncated) + $article_content = $element->find('div.home-desc', 0)->plaintext; + + // Now try expanding article $article_url = $element->find('a.story-link', 0)->href; - $article = getSimpleHTMLDOMCached($article_url); - if ($article) { - //Article body - $var = $article->find('div.articlebody', 0); - if ($var) { - $contents = $var->innertext; - $contents = stripRecursiveHtmlSection($contents, 'div', '