rss-bridge/bridges/BrutBridge.php

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

125 lines
3.8 KiB
PHP
Raw Normal View History

2019-06-08 20:30:42 +03:00
<?php
2019-06-08 20:30:42 +03:00
class BrutBridge extends BridgeAbstract
{
const NAME = 'Brut Bridge';
const URI = 'https://www.brut.media';
2022-06-07 19:05:03 +03:00
const DESCRIPTION = 'Returns 10 newest videos by category and edition';
2019-06-08 20:30:42 +03:00
const MAINTAINER = 'VerifiedJoseph';
const PARAMETERS = [[
'category' => [
'name' => 'Category',
'type' => 'list',
'values' => [
'News' => 'news',
'International' => 'international',
'Economy' => 'economy',
'Science and Technology' => 'science-and-technology',
'Entertainment' => 'entertainment',
'Sports' => 'sport',
'Nature' => 'nature',
'Health' => 'health',
2019-06-08 20:30:42 +03:00
],
'defaultValue' => 'news',
],
'edition' => [
'name' => ' Edition',
'type' => 'list',
'values' => [
'United States' => 'us',
'United Kingdom' => 'uk',
'France' => 'fr',
'Spain' => 'es',
2019-06-08 20:30:42 +03:00
'India' => 'in',
'Mexico' => 'mx',
],
'defaultValue' => 'us',
]
]
2019-06-08 20:30:42 +03:00
];
2019-06-08 20:30:42 +03:00
const CACHE_TIMEOUT = 1800; // 30 mins
2022-06-07 19:05:03 +03:00
private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/';
2019-06-08 20:30:42 +03:00
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI());
2019-06-08 20:30:42 +03:00
$results = $html->find('div.results', 0);
2022-06-07 19:05:03 +03:00
foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) {
2019-06-08 20:30:42 +03:00
$item = [];
2019-06-08 20:30:42 +03:00
$videoPath = self::URI . $li->children(0)->href;
$videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600);
2022-06-07 19:05:03 +03:00
$json = $this->extractJson($videoPageHtml);
$id = array_keys((array) $json->media->index)[0];
2019-06-08 20:30:42 +03:00
$item['uri'] = $videoPath;
2022-06-07 19:05:03 +03:00
$item['title'] = $json->media->index->$id->title;
$item['timestamp'] = $json->media->index->$id->published_at;
$item['enclosures'][] = $json->media->index->$id->media->thumbnail;
2022-06-07 19:05:03 +03:00
$description = $json->media->index->$id->description;
$article = '';
2022-06-07 19:05:03 +03:00
if (is_null($json->media->index->$id->media->seo_article) === false) {
$article = markdownToHtml($json->media->index->$id->media->seo_article);
}
2022-06-07 19:05:03 +03:00
$item['content'] = <<<EOD
<video controls poster="{$json->media->index->$id->media->thumbnail}" preload="none">
<source src="{$json->media->index->$id->media->mp4_url}" type="video/mp4">
</video>
<p>{$description}</p>
{$article}
EOD;
2019-06-08 20:30:42 +03:00
$this->items[] = $item;
if (count($this->items) >= 10) {
break;
}
}
}
2019-06-08 20:30:42 +03:00
public function getURI()
{
2019-06-08 20:30:42 +03:00
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category');
}
return parent::getURI();
}
public function getName()
{
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return $this->getKey('category') . ' - ' .
$this->getKey('edition') . ' - Brut.';
}
2022-06-07 19:05:03 +03:00
return parent::getName();
}
2019-06-08 20:30:42 +03:00
/**
2022-06-07 19:05:03 +03:00
* Extract JSON from page
*/
2022-06-07 19:05:03 +03:00
private function extractJson($html)
{
2022-06-07 19:05:03 +03:00
if (!preg_match($this->jsonRegex, $html, $parts)) {
returnServerError('Failed to extract data from page');
2019-06-08 20:30:42 +03:00
}
2022-06-07 19:05:03 +03:00
$data = json_decode($parts[1]);
2019-06-08 20:30:42 +03:00
2022-06-07 19:05:03 +03:00
if ($data === false) {
returnServerError('Failed to decode extracted data');
2019-06-08 20:30:42 +03:00
}
2022-06-07 19:05:03 +03:00
return $data;
2019-06-08 20:30:42 +03:00
}
}