mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-26 19:36:52 +03:00
[FolhaDeSaoPauloBridge]: Small improvements (#1724)
This commit is contained in:
parent
5c69577253
commit
cb4bc57c72
1 changed files with 31 additions and 15 deletions
|
@ -12,13 +12,25 @@ class FolhaDeSaoPauloBridge extends FeedExpander {
|
||||||
'required' => true,
|
'required' => true,
|
||||||
'title' => 'Select the sub-feed (see https://www1.folha.uol.com.br/feed/)',
|
'title' => 'Select the sub-feed (see https://www1.folha.uol.com.br/feed/)',
|
||||||
'exampleValue' => 'emcimadahora/rss091.xml',
|
'exampleValue' => 'emcimadahora/rss091.xml',
|
||||||
)
|
),
|
||||||
|
'amount' => array(
|
||||||
|
'name' => 'Amount of items to fetch',
|
||||||
|
'type' => 'number',
|
||||||
|
'defaultValue' => 15,
|
||||||
|
),
|
||||||
|
'deep_crawl' => array(
|
||||||
|
'name' => 'Deep Crawl',
|
||||||
|
'description' => 'Crawl each item "deeply", that is, return the article contents',
|
||||||
|
'type' => 'checkbox',
|
||||||
|
'defaultValue' => true,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
protected function parseItem($item){
|
protected function parseItem($item){
|
||||||
$item = parent::parseItem($item);
|
$item = parent::parseItem($item);
|
||||||
|
|
||||||
|
if ($this->getInput('deep_crawl')) {
|
||||||
$articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
|
$articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
|
||||||
if($articleHTMLContent) {
|
if($articleHTMLContent) {
|
||||||
foreach ($articleHTMLContent->find('div.c-news__body .is-hidden') as $toRemove) {
|
foreach ($articleHTMLContent->find('div.c-news__body .is-hidden') as $toRemove) {
|
||||||
|
@ -27,13 +39,17 @@ class FolhaDeSaoPauloBridge extends FeedExpander {
|
||||||
$item_content = $articleHTMLContent->find('div.c-news__body', 0);
|
$item_content = $articleHTMLContent->find('div.c-news__body', 0);
|
||||||
if ($item_content) {
|
if ($item_content) {
|
||||||
$text = $item_content->innertext;
|
$text = $item_content->innertext;
|
||||||
$text = strip_tags($text, '<p><b><a><blockquote><figure><figcaption><img><strong><em>');
|
$text = strip_tags($text, '<p><b><a><blockquote><figure><figcaption><img><strong><em><ul><li>');
|
||||||
$item['content'] = $text;
|
$item['content'] = $text;
|
||||||
$item['uri'] = explode('*', $item['uri'])[1];
|
$item['uri'] = explode('*', $item['uri'])[1];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Debug::log('???: ' . $item['uri']);
|
Debug::log('???: ' . $item['uri']);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$item['uri'] = explode('*', $item['uri'])[1];
|
||||||
|
}
|
||||||
|
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
@ -48,6 +64,6 @@ class FolhaDeSaoPauloBridge extends FeedExpander {
|
||||||
$feed_url = self::URI . '/' . $this->getInput('feed');
|
$feed_url = self::URI . '/' . $this->getInput('feed');
|
||||||
}
|
}
|
||||||
Debug::log('URL: ' . $feed_url);
|
Debug::log('URL: ' . $feed_url);
|
||||||
$this->collectExpandableDatas($feed_url);
|
$this->collectExpandableDatas($feed_url, $this->getInput('amount'));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue