mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 09:35:28 +03:00
[DarkReadingBridge] Fix content extraction (#2315)
Also: - Add article limit (main feed was broken due to too many articles) - Add support for article thumbnail
This commit is contained in:
parent
b86ed70376
commit
970bdd45f9
1 changed files with 7 additions and 6 deletions
|
@ -48,22 +48,25 @@ class DarkReadingBridge extends FeedExpander {
|
|||
if ($feed_id != '000') {
|
||||
$feed_url .= '?f_n=' . $feed_id . '&f_ln=' . $feed_name;
|
||||
}
|
||||
$this->collectExpandableDatas($feed_url);
|
||||
$this->collectExpandableDatas($feed_url, 20);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem){
|
||||
$item = parent::parseItem($newsItem);
|
||||
if (empty($item['content']))
|
||||
return null; //ignore dummy articles
|
||||
$article = getSimpleHTMLDOMCached($item['uri'])
|
||||
or returnServerError('Could not request Dark Reading: ' . $item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$item['enclosures'] = array(); //remove author profile picture
|
||||
$image = $article->find('meta[property="og:image"]', 0);
|
||||
if (is_object($image)) {
|
||||
$image = $image->content;
|
||||
$item['enclosures'] = array($image);
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
private function extractArticleContent($article){
|
||||
$content = $article->find('div#article-main', 0)->innertext;
|
||||
$content = $article->find('div.article-content', 0)->innertext;
|
||||
|
||||
foreach (array(
|
||||
'<div class="divsplitter',
|
||||
|
@ -74,8 +77,6 @@ class DarkReadingBridge extends FeedExpander {
|
|||
$content = stripRecursiveHTMLSection($content, 'div', $div_start);
|
||||
}
|
||||
|
||||
$content = stripWithDelimiters($content, '<h1 ', '</h1>');
|
||||
|
||||
return $content;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue