2023-09-09 10:19:09 +03:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class ArsTechnicaBridge extends FeedExpander
|
|
|
|
{
|
|
|
|
const MAINTAINER = 'phantop';
|
|
|
|
const NAME = 'Ars Technica';
|
|
|
|
const URI = 'https://arstechnica.com/';
|
|
|
|
const DESCRIPTION = 'Returns the latest articles from Ars Technica';
|
|
|
|
const PARAMETERS = [[
|
|
|
|
'section' => [
|
|
|
|
'name' => 'Site section',
|
|
|
|
'type' => 'list',
|
|
|
|
'defaultValue' => 'index',
|
|
|
|
'values' => [
|
|
|
|
'All' => 'index',
|
|
|
|
'Apple' => 'apple',
|
|
|
|
'Board Games' => 'cardboard',
|
|
|
|
'Cars' => 'cars',
|
|
|
|
'Features' => 'features',
|
|
|
|
'Gaming' => 'gaming',
|
|
|
|
'Information Technology' => 'technology-lab',
|
|
|
|
'Science' => 'science',
|
|
|
|
'Staff Blogs' => 'staff-blogs',
|
|
|
|
'Tech Policy' => 'tech-policy',
|
|
|
|
'Tech' => 'gadgets',
|
|
|
|
]
|
|
|
|
]
|
|
|
|
]];
|
|
|
|
|
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
|
2023-10-14 00:14:08 +03:00
|
|
|
$this->collectExpandableDatas($url, 10);
|
2023-09-09 10:19:09 +03:00
|
|
|
}
|
|
|
|
|
2023-10-13 02:59:05 +03:00
|
|
|
protected function parseItem(array $item)
|
2023-09-09 10:19:09 +03:00
|
|
|
{
|
2023-12-23 11:42:37 +03:00
|
|
|
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
2023-09-09 10:19:09 +03:00
|
|
|
$item_html = defaultLinkTo($item_html, self::URI);
|
2023-12-23 11:42:37 +03:00
|
|
|
$item['content'] = $item_html->find('.article-content', 0);
|
2023-12-13 23:40:13 +03:00
|
|
|
|
2024-05-17 22:29:17 +03:00
|
|
|
$parsely = $item_html->find('[name="parsely-page"]', 0);
|
|
|
|
$parsely_json = json_decode(html_entity_decode($parsely->content), true);
|
|
|
|
$item['categories'] = $parsely_json['tags'];
|
|
|
|
|
2023-12-23 11:42:37 +03:00
|
|
|
$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
|
|
|
|
if (null !== $pages) {
|
|
|
|
for ($i = 2; $i <= $pages->innertext; $i++) {
|
|
|
|
$page_url = $item['uri'] . '&page=' . $i;
|
|
|
|
$page_html = getSimpleHTMLDOMCached($page_url);
|
|
|
|
$page_html = defaultLinkTo($page_html, self::URI);
|
|
|
|
$item['content'] .= $page_html->find('.article-content', 0);
|
|
|
|
}
|
|
|
|
$item['content'] = str_get_html($item['content']);
|
2023-12-13 23:40:13 +03:00
|
|
|
}
|
|
|
|
|
2023-09-09 10:19:09 +03:00
|
|
|
// remove various ars advertising
|
|
|
|
$item['content']->find('#social-left', 0)->remove();
|
|
|
|
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
|
|
|
|
$ad->remove();
|
|
|
|
}
|
2023-12-23 11:42:37 +03:00
|
|
|
foreach ($item['content']->find('.ad_wrapper') as $ad) {
|
2023-09-09 10:19:09 +03:00
|
|
|
$ad->remove();
|
|
|
|
}
|
|
|
|
foreach ($item['content']->find('.sidebar') as $ad) {
|
|
|
|
$ad->remove();
|
|
|
|
}
|
|
|
|
|
2023-12-23 11:42:37 +03:00
|
|
|
$item['content'] = backgroundToImg($item['content']);
|
2023-09-09 10:19:09 +03:00
|
|
|
|
|
|
|
$item['uid'] = explode('=', $item['uri'])[1];
|
|
|
|
|
|
|
|
return $item;
|
|
|
|
}
|
|
|
|
}
|