mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-02-16 15:19:55 +03:00
[ZeitBridge] Remove annoyances, add content
Remove navigational elements, podcast images. Add many more header images, article content in <ul> (and for ggod measure in <ol>) and quotes with their content and not only their author. Extreme example: https://www.zeit.de/campus/2024-05/protest-palaestina-universitaet-europa-uebersicht
This commit is contained in:
parent
a7ed3d56f9
commit
4d12aa2a9e
1 changed files with 3 additions and 3 deletions
|
@ -87,7 +87,7 @@ class ZeitBridge extends FeedExpander
|
|||
// remove known bad elements
|
||||
foreach (
|
||||
$article->find(
|
||||
'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, div[data-paywall], .js-embed-consent'
|
||||
'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, .podcast-player__image, div[data-paywall], .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link'
|
||||
) as $bad
|
||||
) {
|
||||
$bad->remove();
|
||||
|
@ -114,7 +114,7 @@ class ZeitBridge extends FeedExpander
|
|||
}
|
||||
|
||||
// header image
|
||||
$headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('header', 0);
|
||||
$headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('.article-header', 0) ?? $article->find('header', 0);
|
||||
if ($headerimg) {
|
||||
$item['content'] .= implode('', $headerimg->find('img[src], figcaption'));
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ class ZeitBridge extends FeedExpander
|
|||
|
||||
if ($pages) {
|
||||
foreach ($pages as $page) {
|
||||
$elements = $page->find('p, h2, figcaption, img[src]');
|
||||
$elements = $page->find('p, ul, ol, h2, figure.article__media img[src], figure.article__media figcaption, figure.quote');
|
||||
$item['content'] .= implode('', $elements);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue