[
'name' => 'Domain to use',
'required' => true,
'defaultValue' => self::DEFAULT_DOMAIN
],
'page' => [
'name' => 'Initial page to load',
'required' => true,
'exampleValue' => 'franceinter/podcasts/burne-out'
]
]];
private function getDomain()
{
$domain = $this->getInput('domain');
if (empty($domain)) {
$domain = self::DEFAULT_DOMAIN;
}
if (strpos($domain, '://') === false) {
$domain = 'https://' . $domain;
}
return $domain;
}
public function getURI()
{
return $this->getDomain() . '/' . $this->getInput('page');
}
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI());
// An array of dom nodes
$documentsList = $html->find('.DocumentsList', 0);
$documentsListWrapper = $documentsList->find('.DocumentsList-wrapper', 0);
$cardList = $documentsListWrapper->find('.CardMedia');
foreach ($cardList as $card) {
$item = [];
$title_link = $card->find('.ConceptTitle a', 0);
$item['title'] = $title_link->plaintext;
$uri = $title_link->getAttribute('href', 0);
switch (substr($uri, 0, 1)) {
case 'h': // absolute uri
$item['uri'] = $uri;
break;
case '/': // domain relative uri
$item['uri'] = $this->getDomain() . $uri;
break;
default:
$item['uri'] = $this->getDomain() . '/' . $uri;
}
// Finally, obtain the mp3 from some weird Radio France API (url obtained by reading network calls, no less)
$media_url = self::APIENDPOINT . '?value=' . $uri;
$rawJSON = getSimpleHTMLDOMCached($media_url);
$processedJSON = json_decode($rawJSON);
$model_content = $processedJSON->content;
if (empty($model_content->manifestations)) {
error_log("Seems like $uri has no manifestation");
} else {
$item['enclosures'] = [ $model_content->manifestations[0]->url ];
$item['content'] = '';
if (isset($model_content->visual)) {
$item['content'] .= "visual->src}\"
alt=\"{$model_content->visual->legend}\"
style=\"float:left; width:400px; margin: 1em;\"/>";
}
if (isset($model_content->standFirst)) {
$item['content'] .= $model_content->standFirst;
}
if (isset($model_content->bodyJson)) {
if (!empty($item['content'])) {
$item['content'] .= '
{$childText}{$valueText}
\n"; case 'quote': return "{$childText}{$valueText}\n"; case 'link': return "data->href}\">{$childText}{$valueText}\n"; case 'audio': return ''; case 'embed': return $jsonElement->data->html; default: return $jsonElement->value; } } private function convertJsonChildrenToHTML($children) { $converted = array_map([$this, 'convertJsonElementToHTML'], $children); return array_reduce($converted, function ($a, $b) { return $a . $b; }, ''); } private function removeAds($element) { $ads = $element->find('AdSlot'); foreach ($ads as $ad) { $ad->remove(); } return $element; } /** * Replaces all relative URIs with absolute ones * @param $element A simplehtmldom element * @return The $element->innertext with all URIs replaced */ private function replaceUriInHtmlElement($element) { $returned = $element->innertext; foreach (self::REPLACED_ATTRIBUTES as $initial => $final) { $returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned); } return $returned; } }