fix: various fixes (#3741)

This commit is contained in:
Dag 2023-10-12 19:49:04 +02:00 committed by GitHub
parent d21f8cebf6
commit 6a72c56cdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 19 deletions

View file

@ -25,20 +25,28 @@ class CarThrottleBridge extends BridgeAbstract
$articlePage = getSimpleHTMLDOMCached($item['uri'])
or returnServerError('could not retrieve page');
$item['author'] = $articlePage->find('div.author div')[1]->innertext;
$dinges = $articlePage->find('div.main-body')[0];
//remove ads
foreach ($dinges->find('aside') as $ad) {
$ad->outertext = '';
$dinges->save();
$authorDiv = $articlePage->find('div.author div');
if ($authorDiv) {
$item['author'] = $authorDiv[1]->innertext;
}
$item['content'] = $articlePage->find('div.summary')[0] .
$articlePage->find('figure.main-image')[0] .
$dinges;
$dinges = $articlePage->find('div.main-body')[0] ?? null;
//remove ads
if ($dinges) {
foreach ($dinges->find('aside') as $ad) {
$ad->outertext = '';
$dinges->save();
}
}
$var = $articlePage->find('div.summary')[0] ?? '';
$var1 = $articlePage->find('figure.main-image')[0] ?? '';
$dinges1 = $dinges ?? '';
$item['content'] = $var .
$var1 .
$dinges1;
//add the item to the list
array_push($this->items, $item);
}
}

View file

@ -23,9 +23,11 @@ class GatesNotesBridge extends BridgeAbstract
$cleanedContent = str_replace([
'<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">',
'</string>',
'\r\n',
], '', $rawContent);
$cleanedContent = str_replace('\"', '"', $cleanedContent);
$cleanedContent = trim($cleanedContent, '"');
// The content is actually a json between quotes with \r\n inserted
$json = Json::decode($cleanedContent, false);
foreach ($json as $article) {
@ -98,7 +100,7 @@ class GatesNotesBridge extends BridgeAbstract
}
$article_body = sanitize($article_body->innertext);
$content = $top_description . $hero_image . $article_body;
$content = $top_description . ($hero_image ?? '') . $article_body;
return $content;
}

View file

@ -158,13 +158,12 @@ function getSimpleHTMLDOM(
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT
) {
$content = getContents(
$url,
$header ?? [],
$opts ?? []
);
$html = getContents($url, $header ?? [], $opts ?? []);
if ($html === '') {
throw new \Exception('Unable to parse dom because the http response was the empty string');
}
return str_get_html(
$content,
$html,
$lowercase,
$forceTagsClosed,
$target_charset,