mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-23 01:55:27 +03:00
1232de5744
As far as I understand, this supports all radio france sub-pages listing podcast and/or shows (tested with my two favorites shows, so not a very professionnal test). We use here the data model provided by Radio France, which includes all data in an easily usable format.
207 lines
7.6 KiB
PHP
207 lines
7.6 KiB
PHP
<?php
|
|
|
|
/**
|
|
* A bridge allowing fetching of Radio France radios transcripts.
|
|
* I expect it to work at least for France Inter and France Culture.
|
|
* I currently test it with
|
|
* * Burne Out (https://www.radiofrance.fr/franceinter/podcasts/burne-out)
|
|
* * La méthode scientifique (https://www.radiofrance.fr/franceculture/podcasts/la-methode-scientifique)
|
|
* * Las science CQFD
|
|
*/
|
|
class RadioFranceBridge extends BridgeAbstract
|
|
{
|
|
const NAME = 'Radio France';
|
|
const URI = 'https://www.radiofrance.fr/franceinter/podcasts';
|
|
const DESCRIPTION = 'A bridge allowing to read transcripts for Radio France shows';
|
|
const MAINTAINER = 'Riduidel';
|
|
const DEFAULT_DOMAIN = 'https://www.radiofrance.fr';
|
|
|
|
/*
|
|
* The URL Prefix of the (Webapp-)API
|
|
* @const APIENDPOINT https-URL of the used endpoint
|
|
*/
|
|
const APIENDPOINT = 'https://www.radiofrance.fr/api/v2.0/path';
|
|
const PARAMETERS = [
|
|
[
|
|
'domain' => [
|
|
'name' => 'Domain to use',
|
|
'required' => true,
|
|
'defaultValue' => self::DEFAULT_DOMAIN
|
|
],
|
|
'page' => [
|
|
'name' => 'Initial page to load',
|
|
'required' => true,
|
|
'exampleValue' => 'franceinter/podcasts/burne-out'
|
|
]
|
|
]];
|
|
|
|
private function getDomain()
|
|
{
|
|
$domain = $this->getInput('domain');
|
|
if (empty($domain)) {
|
|
$domain = self::DEFAULT_DOMAIN;
|
|
}
|
|
if (strpos($domain, '://') === false) {
|
|
$domain = 'https://' . $domain;
|
|
}
|
|
return $domain;
|
|
}
|
|
|
|
public function getURI()
|
|
{
|
|
return $this->getDomain() . '/' . $this->getInput('page');
|
|
}
|
|
|
|
public function collectData()
|
|
{
|
|
$html = getSimpleHTMLDOM($this->getURI());
|
|
|
|
// An array of dom nodes
|
|
$documentsList = $html->find('.DocumentsList', 0);
|
|
$documentsListWrapper = $documentsList->find('.DocumentsList-wrapper', 0);
|
|
$cardList = $documentsListWrapper->find('.Card');
|
|
|
|
foreach ($cardList as $card) {
|
|
$item = [];
|
|
$title_link = $card->find('.ConceptTitle a', 0);
|
|
$item['title'] = $title_link->plaintext;
|
|
$uri = $title_link->getAttribute('href', 0);
|
|
switch (substr($uri, 0, 1)) {
|
|
case 'h': // absolute uri
|
|
$item['uri'] = $uri;
|
|
break;
|
|
case '/': // domain relative uri
|
|
$item['uri'] = $this->getDomain() . $uri;
|
|
break;
|
|
default:
|
|
$item['uri'] = $this->getDomain() . '/' . $uri;
|
|
}
|
|
// Finally, obtain the mp3 from some weird Radio France API (url obtained by reading network calls, no less)
|
|
$media_url = self::APIENDPOINT . '?value=' . $uri;
|
|
$rawJSON = getSimpleHTMLDOMCached($media_url);
|
|
$processedJSON = json_decode($rawJSON);
|
|
$model_content = $processedJSON->content;
|
|
if (empty($model_content->manifestations)) {
|
|
error_log("Seems like $uri has no manifestation");
|
|
} else {
|
|
$item['enclosures'] = [ $model_content->manifestations[0]->url ];
|
|
|
|
$item['content'] = '';
|
|
if (isset($model_content->visual)) {
|
|
$item['content'] .= "<img
|
|
src=\"{$model_content->visual->src}\"
|
|
alt=\"{$model_content->visual->legend}\"
|
|
style=\"float:left; width:400px; margin: 1em;\"/>";
|
|
}
|
|
if (isset($model_content->standFirst)) {
|
|
$item['content'] .= $model_content->standFirst;
|
|
}
|
|
if (isset($model_content->bodyJson)) {
|
|
if (!empty($item['content'])) {
|
|
$item['content'] .= '<hr/>';
|
|
}
|
|
$pseudo_html_array = array_map([$this, 'convertJsonElementToHTML'], $model_content->bodyJson);
|
|
$pseudo_html_text = array_reduce(
|
|
$pseudo_html_array,
|
|
function ($text, $element) {
|
|
return $text . "\n" . $element;
|
|
},
|
|
''
|
|
);
|
|
$item['content'] .= $pseudo_html_text;
|
|
}
|
|
if (isset($model_content->producers)) {
|
|
$item['author'] = $this->readAuthorsNamesFrom($model_content->producers);
|
|
} elseif (isset($model_content->staff)) {
|
|
$item['author'] = $this->readAuthorsNamesFrom($model_content->staff);
|
|
}
|
|
$time = $card->find('time', 0);
|
|
$timevalue = $time->getAttribute('datetime');
|
|
$item['timestamp'] = strtotime($timevalue);
|
|
|
|
$this->items[] = $item;
|
|
}
|
|
}
|
|
}
|
|
|
|
private function readAuthorsNamesFrom($persons_array)
|
|
{
|
|
$persons_names = array_map(function ($person_element) {
|
|
return $person_element->name;
|
|
}, $persons_array);
|
|
return array_reduce($persons_names, function ($a, $b) {
|
|
if (!empty($a)) {
|
|
$a .= ', ';
|
|
}
|
|
return $a . $b;
|
|
}, '');
|
|
}
|
|
|
|
private function convertJsonElementToHTML($jsonElement)
|
|
{
|
|
$childText = isset($jsonElement->children) ? $this->convertJsonChildrenToHTML($jsonElement->children) : '';
|
|
$valueText = isset($jsonElement->value) ? $jsonElement->value : '';
|
|
switch ($jsonElement->type) {
|
|
case 'text':
|
|
return "{$childText}{$valueText}";
|
|
case 'heading':
|
|
$level = $jsonElement->level;
|
|
return "<h$level>{$childText}{$valueText}</h$level>";
|
|
case 'list':
|
|
$tag = 'ul';
|
|
if (isset($jsonElement->ordered)) {
|
|
if ($jsonElement->ordered) {
|
|
$tag = 'ol';
|
|
}
|
|
}
|
|
return "<$tag>\n" . $childText . "</$tag>\n";
|
|
case 'list_item':
|
|
return "<li>{$childText}{$valueText}</li>\n";
|
|
case 'bounce':
|
|
return '';
|
|
case 'paragraph':
|
|
return "<p>{$childText}{$valueText}</p>\n";
|
|
case 'quote':
|
|
return "<blockquote>{$childText}{$valueText}</blockquote>\n";
|
|
case 'link':
|
|
return "<a href=\"{$jsonElement->data->href}\">{$childText}{$valueText}</a>\n";
|
|
case 'audio':
|
|
return '';
|
|
case 'embed':
|
|
return $jsonElement->data->html;
|
|
default:
|
|
return $jsonElement->value;
|
|
}
|
|
}
|
|
|
|
private function convertJsonChildrenToHTML($children)
|
|
{
|
|
$converted = array_map([$this, 'convertJsonElementToHTML'], $children);
|
|
return array_reduce($converted, function ($a, $b) {
|
|
return $a . $b;
|
|
}, '');
|
|
}
|
|
|
|
private function removeAds($element)
|
|
{
|
|
$ads = $element->find('AdSlot');
|
|
foreach ($ads as $ad) {
|
|
$ad->remove();
|
|
}
|
|
return $element;
|
|
}
|
|
|
|
/**
|
|
* Replaces all relative URIs with absolute ones
|
|
* @param $element A simplehtmldom element
|
|
* @return The $element->innertext with all URIs replaced
|
|
*/
|
|
private function replaceUriInHtmlElement($element)
|
|
{
|
|
$returned = $element->innertext;
|
|
foreach (self::REPLACED_ATTRIBUTES as $initial => $final) {
|
|
$returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned);
|
|
}
|
|
return $returned;
|
|
}
|
|
}
|