mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 17:45:40 +03:00
[HeiseBridge] Handle heise+ articles better (#3358)
- Stop parsing paywalled heise+ articles, as they had garbage content and anyways not the full article. - Link to archive.today to access the full article without account. (Automatically getting the full article from archive.ph was not feasible b/c of captchas and problems extracting the actual content)
This commit is contained in:
parent
00e716d84d
commit
212c56fde5
1 changed files with 11 additions and 1 deletions
|
@ -118,12 +118,22 @@ class HeiseBridge extends FeedExpander
|
|||
protected function parseItem($feedItem)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$item['uri'] = explode('?', $item['uri'])[0] . '?seite=all';
|
||||
|
||||
// strip rss parameter
|
||||
$item['uri'] = explode('?', $item['uri'])[0];
|
||||
|
||||
// ignore TechStage articles
|
||||
if (strpos($item['uri'], 'https://www.heise.de') !== 0) {
|
||||
return $item;
|
||||
}
|
||||
|
||||
// abort on heise+ articles and link to archive.ph for full-text content
|
||||
if (str_starts_with($item['title'], 'heise+ |')) {
|
||||
$item['uri'] = 'https://archive.ph/?run=1&url=' . urlencode($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
||||
$item['uri'] .= '?seite=all';
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
if ($article) {
|
||||
|
|
Loading…
Reference in a new issue