2022-04-05 15:03:25 +03:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class AssociatedPressNewsBridge extends BridgeAbstract
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
const NAME = 'Associated Press News Bridge';
|
|
|
|
const URI = 'https://apnews.com/';
|
|
|
|
const DESCRIPTION = 'Returns newest articles by topic';
|
|
|
|
const MAINTAINER = 'VerifiedJoseph';
|
|
|
|
const PARAMETERS = [
|
|
|
|
'Standard Topics' => [
|
|
|
|
'topic' => [
|
|
|
|
'name' => 'Topic',
|
|
|
|
'type' => 'list',
|
|
|
|
'values' => [
|
|
|
|
'AP Top News' => 'apf-topnews',
|
|
|
|
'Sports' => 'apf-sports',
|
|
|
|
'Entertainment' => 'apf-entertainment',
|
|
|
|
'Oddities' => 'apf-oddities',
|
|
|
|
'Travel' => 'apf-Travel',
|
|
|
|
'Technology' => 'apf-technology',
|
|
|
|
'Lifestyle' => 'apf-lifestyle',
|
|
|
|
'Business' => 'apf-business',
|
|
|
|
'U.S. News' => 'apf-usnews',
|
|
|
|
'Health' => 'apf-Health',
|
|
|
|
'Science' => 'apf-science',
|
|
|
|
'World News' => 'apf-WorldNews',
|
|
|
|
'Politics' => 'apf-politics',
|
|
|
|
'Religion' => 'apf-religion',
|
|
|
|
'Photo Galleries' => 'PhotoGalleries',
|
|
|
|
'Fact Checks' => 'APFactCheck',
|
|
|
|
'Videos' => 'apf-videos',
|
2022-07-01 16:10:30 +03:00
|
|
|
],
|
2022-04-05 15:03:25 +03:00
|
|
|
'defaultValue' => 'apf-topnews',
|
2022-07-01 16:10:30 +03:00
|
|
|
],
|
|
|
|
],
|
2022-04-05 15:03:25 +03:00
|
|
|
'Custom Topic' => [
|
|
|
|
'topic' => [
|
|
|
|
'name' => 'Topic',
|
|
|
|
'type' => 'text',
|
|
|
|
'required' => true,
|
|
|
|
'exampleValue' => 'europe'
|
2022-07-01 16:10:30 +03:00
|
|
|
],
|
|
|
|
]
|
|
|
|
];
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
const CACHE_TIMEOUT = 900; // 15 mins
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
private $detectParamRegex = '/^https?:\/\/(?:www\.)?apnews\.com\/(?:[tag|hub]+\/)?([\w-]+)$/';
|
|
|
|
private $tagEndpoint = 'https://afs-prod.appspot.com/api/v2/feed/tag?tags=';
|
|
|
|
private $feedName = '';
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
public function detectParameters($url)
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
$params = [];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if (preg_match($this->detectParamRegex, $url, $matches) > 0) {
|
|
|
|
$params['topic'] = $matches[1];
|
|
|
|
$params['context'] = 'Custom Topic';
|
|
|
|
return $params;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
return null;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
public function collectData()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
switch ($this->getInput('topic')) {
|
|
|
|
case 'Podcasts':
|
|
|
|
returnClientError('Podcasts topic feed is not supported');
|
2022-07-01 16:10:30 +03:00
|
|
|
break;
|
2022-04-05 15:03:25 +03:00
|
|
|
case 'PressReleases':
|
|
|
|
returnClientError('PressReleases topic feed is not supported');
|
2022-07-01 16:10:30 +03:00
|
|
|
break;
|
|
|
|
default:
|
2022-04-05 15:03:25 +03:00
|
|
|
$this->collectCardData();
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
public function getURI()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
if (!is_null($this->getInput('topic'))) {
|
|
|
|
return self::URI . $this->getInput('topic');
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
return parent::getURI();
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
public function getName()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
if (!empty($this->feedName)) {
|
|
|
|
return $this->feedName . ' - Associated Press';
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
return parent::getName();
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
private function getTagURI()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
if (!is_null($this->getInput('topic'))) {
|
|
|
|
return $this->tagEndpoint . $this->getInput('topic');
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
return parent::getURI();
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
private function collectCardData()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-04-05 15:03:25 +03:00
|
|
|
$json = getContents($this->getTagURI())
|
|
|
|
or returnServerError('Could not request: ' . $this->getTagURI());
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$tagContents = json_decode($json, true);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if (empty($tagContents['tagObjs'])) {
|
|
|
|
returnClientError('Topic not found: ' . $this->getInput('topic'));
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$this->feedName = $tagContents['tagObjs'][0]['name'];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
foreach ($tagContents['cards'] as $card) {
|
|
|
|
$item = [];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
// skip hub peeks & Notifications
|
|
|
|
if ($card['cardType'] == 'Hub Peek' || $card['cardType'] == 'Notification') {
|
|
|
|
continue;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$storyContent = $card['contents'][0];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
switch ($storyContent['contentType']) {
|
|
|
|
case 'web': // Skip link only content
|
|
|
|
continue 2;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
case 'video':
|
|
|
|
$html = $this->processVideo($storyContent);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$item['enclosures'][] = 'https://storage.googleapis.com/afs-prod/media/'
|
|
|
|
. $storyContent['media'][0]['id'] . '/800.jpeg';
|
2022-07-01 16:10:30 +03:00
|
|
|
break;
|
|
|
|
default:
|
2022-04-05 15:03:25 +03:00
|
|
|
if (empty($storyContent['storyHTML'])) { // Skip if no storyHTML
|
|
|
|
continue 2;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$html = defaultLinkTo($storyContent['storyHTML'], self::URI);
|
|
|
|
$html = str_get_html($html);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$this->processMediaPlaceholders($html, $storyContent['id']);
|
|
|
|
$this->processHubLinks($html, $storyContent);
|
|
|
|
$this->processIframes($html);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if (!is_null($storyContent['leadPhotoId'])) {
|
2022-07-08 19:42:45 +03:00
|
|
|
$leadPhotoUrl = sprintf('https://storage.googleapis.com/afs-prod/media/%s/800.jpeg', $storyContent['leadPhotoId']);
|
|
|
|
$leadPhotoImageTag = sprintf('<img src="%s">', $leadPhotoUrl);
|
|
|
|
// Move the image to the beginning of the content
|
|
|
|
$html = $leadPhotoImageTag . $html;
|
|
|
|
// Explicitly not adding it to the item's enclosures!
|
2022-04-05 15:03:25 +03:00
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$item['title'] = $card['contents'][0]['headline'];
|
|
|
|
$item['uri'] = self::URI . $card['shortId'];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if ($card['contents'][0]['localLinkUrl']) {
|
|
|
|
$item['uri'] = $card['contents'][0]['localLinkUrl'];
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$item['timestamp'] = $storyContent['published'];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if (is_null($storyContent['bylines']) === false) {
|
|
|
|
// Remove 'By' from the bylines
|
|
|
|
if (substr($storyContent['bylines'], 0, 2) == 'By') {
|
|
|
|
$item['author'] = ltrim($storyContent['bylines'], 'By ');
|
2022-07-01 16:10:30 +03:00
|
|
|
} else {
|
2022-04-05 15:03:25 +03:00
|
|
|
$item['author'] = $storyContent['bylines'];
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
$item['content'] = $html;
|
|
|
|
|
|
|
|
foreach ($storyContent['tagObjs'] as $tag) {
|
|
|
|
$item['categories'][] = $tag['name'];
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2022-04-05 15:03:25 +03:00
|
|
|
|
|
|
|
$this->items[] = $item;
|
|
|
|
|
|
|
|
if (count($this->items) >= 15) {
|
|
|
|
break;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-04-05 15:03:25 +03:00
|
|
|
|
|
|
|
private function processMediaPlaceholders($html, $id)
|
|
|
|
{
|
|
|
|
if ($html->find('div.media-placeholder', 0)) {
|
|
|
|
// Fetch page content
|
|
|
|
$json = getContents('https://afs-prod.appspot.com/api/v2/content/' . $id);
|
|
|
|
$storyContent = json_decode($json, true);
|
|
|
|
|
|
|
|
foreach ($html->find('div.media-placeholder') as $div) {
|
|
|
|
$key = array_search($div->id, $storyContent['mediumIds']);
|
|
|
|
|
|
|
|
if (!isset($storyContent['media'][$key])) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$media = $storyContent['media'][$key];
|
|
|
|
|
|
|
|
if ($media['type'] === 'Photo') {
|
|
|
|
$mediaUrl = $media['gcsBaseUrl'] . $media['imageRenderedSizes'][0] . $media['imageFileExtension'];
|
|
|
|
$mediaCaption = $media['caption'];
|
|
|
|
|
|
|
|
$div->outertext = <<<EOD
|
|
|
|
<figure><img loading="lazy" src="{$mediaUrl}"/><figcaption>{$mediaCaption}</figcaption></figure>
|
2022-04-05 15:37:15 +03:00
|
|
|
EOD;
|
2022-04-05 15:03:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if ($media['type'] === 'YouTube') {
|
|
|
|
$div->outertext = <<<EOD
|
|
|
|
<iframe src="https://www.youtube.com/embed/{$media['externalId']}" width="560" height="315">
|
|
|
|
</iframe>
|
2022-04-05 15:37:15 +03:00
|
|
|
EOD;
|
2022-04-05 15:03:25 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
/*
|
|
|
|
Create full coverage links (HubLinks)
|
|
|
|
*/
|
|
|
|
private function processHubLinks($html, $storyContent)
|
|
|
|
{
|
|
|
|
if (!empty($storyContent['richEmbeds'])) {
|
|
|
|
foreach ($storyContent['richEmbeds'] as $embed) {
|
|
|
|
if ($embed['type'] === 'Hub Link') {
|
|
|
|
$url = self::URI . $embed['tag']['id'];
|
|
|
|
$div = $html->find('div[id=' . $embed['id'] . ']', 0);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if ($div) {
|
|
|
|
$div->outertext = <<<EOD
|
|
|
|
<p><a href="{$url}">{$embed['calloutText']} {$embed['displayName']}</a></p>
|
|
|
|
EOD;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
private function processVideo($storyContent)
|
|
|
|
{
|
|
|
|
$video = $storyContent['media'][0];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if ($video['type'] === 'YouTube') {
|
|
|
|
$url = 'https://www.youtube.com/embed/' . $video['externalId'];
|
|
|
|
$html = <<<EOD
|
|
|
|
<iframe width="560" height="315" src="{$url}" frameborder="0" allowfullscreen></iframe>
|
|
|
|
EOD;
|
|
|
|
} else {
|
|
|
|
$html = <<<EOD
|
|
|
|
<video controls poster="https://storage.googleapis.com/afs-prod/media/{$video['id']}/800.jpeg" preload="none">
|
|
|
|
<source src="{$video['gcsBaseUrl']} {$video['videoRenderedSizes'][0]} {$video['videoFileExtension']}" type="video/mp4">
|
|
|
|
</video>
|
|
|
|
EOD;
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
return $html;
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
// Remove datawrapper.dwcdn.net iframes and related javaScript
|
|
|
|
private function processIframes($html)
|
|
|
|
{
|
|
|
|
foreach ($html->find('iframe') as $index => $iframe) {
|
|
|
|
if (preg_match('/datawrapper\.dwcdn\.net/', $iframe->src)) {
|
|
|
|
$iframe->outertext = '';
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-04-05 15:03:25 +03:00
|
|
|
if ($html->find('script', $index)) {
|
|
|
|
$html->find('script', $index)->outertext = '';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|