2019-07-29 13:15:08 +03:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class NYTBridge extends FeedExpander
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2019-07-29 13:15:08 +03:00
|
|
|
const MAINTAINER = 'IceWreck';
|
|
|
|
const NAME = 'New York Times Bridge';
|
|
|
|
const URI = 'https://www.nytimes.com/';
|
2021-06-30 13:14:25 +03:00
|
|
|
const CACHE_TIMEOUT = 900; // 15 minutes
|
2019-07-29 13:15:08 +03:00
|
|
|
const DESCRIPTION = 'RSS feed for the New York Times';
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-07-29 13:15:08 +03:00
|
|
|
public function collectData()
|
|
|
|
{
|
2023-10-13 01:25:34 +03:00
|
|
|
$url = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml';
|
|
|
|
$this->collectExpandableDatas($url, 40);
|
2019-07-29 13:15:08 +03:00
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2023-10-13 01:25:34 +03:00
|
|
|
protected function parseItem($item)
|
2019-07-29 13:15:08 +03:00
|
|
|
{
|
2023-10-13 01:25:34 +03:00
|
|
|
$item = parent::parseItem($item);
|
|
|
|
|
2021-06-30 13:14:25 +03:00
|
|
|
$article = '';
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-09-08 20:54:09 +03:00
|
|
|
try {
|
2023-10-13 01:25:34 +03:00
|
|
|
$articlePage = getSimpleHTMLDOM($item['uri']);
|
2022-09-08 20:54:09 +03:00
|
|
|
} catch (HttpException $e) {
|
|
|
|
// 403 Forbidden, This means we got anti-bot response
|
|
|
|
if ($e->getCode() === 403) {
|
|
|
|
return $item;
|
|
|
|
}
|
|
|
|
throw $e;
|
|
|
|
}
|
2021-06-30 13:14:25 +03:00
|
|
|
// handle subtitle
|
|
|
|
$subtitle = $articlePage->find('p.css-w6ymp8', 0);
|
|
|
|
if ($subtitle != null) {
|
|
|
|
$article .= '<strong>' . $subtitle->plaintext . '</strong>';
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-07-29 13:15:08 +03:00
|
|
|
// figure contain's the main article image
|
2021-06-30 13:14:25 +03:00
|
|
|
$article .= $articlePage->find('figure', 0) . '<br />';
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2021-06-30 13:14:25 +03:00
|
|
|
// section.meteredContent has the actual article
|
|
|
|
foreach ($articlePage->find('section.meteredContent p') as $element) {
|
|
|
|
$article .= '' . $element . '';
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2019-07-29 13:15:08 +03:00
|
|
|
$item['content'] = $article;
|
|
|
|
return $item;
|
|
|
|
}
|
|
|
|
}
|