mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-02-16 15:19:55 +03:00
[WeLiveSecurity] Fix content extraction (#3734)
This commit is contained in:
parent
47f52b5912
commit
143f90da60
1 changed files with 26 additions and 9 deletions
|
@ -16,19 +16,36 @@ class WeLiveSecurityBridge extends FeedExpander
|
|||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article_html) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$html) {
|
||||
$item['content'] .= '<br /><p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
return $item;
|
||||
}
|
||||
|
||||
$article_content = $article_html->find('div.formatted', 0)->innertext;
|
||||
$article_content = stripWithDelimiters($article_content, '<script', '</script>');
|
||||
$article_content = stripRecursiveHTMLSection($article_content, 'div', '<div class="comments');
|
||||
$article_content = stripRecursiveHTMLSection($article_content, 'div', '<div class="similar-articles');
|
||||
$article_content = stripRecursiveHTMLSection($article_content, 'span', '<span class="meta');
|
||||
$item['content'] = trim($article_content);
|
||||
$html = $html->find('.article-page', 0);
|
||||
$content_html = $html->find('.article-body', 0);
|
||||
|
||||
// Remove social media footer
|
||||
foreach ($content_html->find('blockquote') as $blockquote) {
|
||||
if (str_starts_with(trim($blockquote->plaintext), 'Connect with us on')) {
|
||||
$blockquote->outertext = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Headline subtitle
|
||||
$content = $content_html->innertext;
|
||||
$subtitle = $html->find('.sub-title', 0);
|
||||
if ($subtitle) {
|
||||
$content = '<p><b>' . $subtitle->plaintext . '</b></p>' . $content;
|
||||
}
|
||||
|
||||
// Author
|
||||
$author = $html->find('.article-author', 0);
|
||||
if ($author && !isset($item['author'])) {
|
||||
$item['author'] = trim($author->plaintext);
|
||||
}
|
||||
|
||||
$item['content'] = trim($content);
|
||||
return $item;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue