mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-01 14:24:39 +03:00
[HeiseBridge] Handle heise+ articles better (#3358)
- Stop parsing paywalled heise+ articles, as they had garbage content and anyways not the full article. - Link to archive.today to access the full article without account. (Automatically getting the full article from archive.ph was not feasible b/c of captchas and problems extracting the actual content)
This commit is contained in:
parent
00e716d84d
commit
212c56fde5
1 changed files with 11 additions and 1 deletions
|
@ -118,12 +118,22 @@ class HeiseBridge extends FeedExpander
|
||||||
protected function parseItem($feedItem)
|
protected function parseItem($feedItem)
|
||||||
{
|
{
|
||||||
$item = parent::parseItem($feedItem);
|
$item = parent::parseItem($feedItem);
|
||||||
$item['uri'] = explode('?', $item['uri'])[0] . '?seite=all';
|
|
||||||
|
|
||||||
|
// strip rss parameter
|
||||||
|
$item['uri'] = explode('?', $item['uri'])[0];
|
||||||
|
|
||||||
|
// ignore TechStage articles
|
||||||
if (strpos($item['uri'], 'https://www.heise.de') !== 0) {
|
if (strpos($item['uri'], 'https://www.heise.de') !== 0) {
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// abort on heise+ articles and link to archive.ph for full-text content
|
||||||
|
if (str_starts_with($item['title'], 'heise+ |')) {
|
||||||
|
$item['uri'] = 'https://archive.ph/?run=1&url=' . urlencode($item['uri']);
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
$item['uri'] .= '?seite=all';
|
||||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||||
|
|
||||||
if ($article) {
|
if ($article) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue