2019-05-15 22:51:23 +03:00
|
|
|
<?php
|
|
|
|
|
2022-07-01 16:10:30 +03:00
|
|
|
class MediapartBridge extends FeedExpander
|
|
|
|
{
|
|
|
|
const MAINTAINER = 'killruana';
|
|
|
|
const NAME = 'Mediapart Bridge';
|
|
|
|
const URI = 'https://www.mediapart.fr/';
|
|
|
|
const PARAMETERS = [
|
|
|
|
[
|
|
|
|
'single_page_mode' => [
|
|
|
|
'name' => 'Single page article',
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'title' => 'Display long articles on a single page',
|
|
|
|
'defaultValue' => 'checked'
|
|
|
|
],
|
|
|
|
'mpsessid' => [
|
|
|
|
'name' => 'MPSESSID',
|
|
|
|
'type' => 'text',
|
|
|
|
'title' => 'Value of the session cookie MPSESSID'
|
|
|
|
]
|
|
|
|
]
|
|
|
|
];
|
|
|
|
const CACHE_TIMEOUT = 7200; // 2h
|
|
|
|
const DESCRIPTION = 'Returns the newest articles.';
|
2019-05-15 22:51:23 +03:00
|
|
|
|
2022-07-01 16:10:30 +03:00
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$url = self::URI . 'articles/feed';
|
|
|
|
$this->collectExpandableDatas($url);
|
|
|
|
}
|
2019-05-15 22:51:23 +03:00
|
|
|
|
2023-10-13 02:59:05 +03:00
|
|
|
protected function parseItem(array $item)
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2023-10-13 01:25:34 +03:00
|
|
|
$itemUrl = $item['uri'];
|
2019-05-15 22:51:23 +03:00
|
|
|
|
2022-07-01 16:10:30 +03:00
|
|
|
// Mediapart provide multiple type of contents.
|
|
|
|
// We only process items relative to the newspaper
|
|
|
|
// See issue #1292 - https://github.com/RSS-Bridge/rss-bridge/issues/1292
|
|
|
|
if (strpos($item['uri'], self::URI . 'journal/') === 0) {
|
|
|
|
// Enable single page mode?
|
|
|
|
if ($this->getInput('single_page_mode') === true) {
|
|
|
|
$item['uri'] .= '?onglet=full';
|
|
|
|
}
|
2019-09-16 22:26:19 +03:00
|
|
|
|
2022-07-01 16:10:30 +03:00
|
|
|
// If a session cookie is defined, get the full article
|
|
|
|
$mpsessid = $this->getInput('mpsessid');
|
|
|
|
if (!empty($mpsessid)) {
|
|
|
|
// Set the session cookie
|
|
|
|
$opt = [];
|
|
|
|
$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;
|
2019-09-16 22:26:19 +03:00
|
|
|
|
2023-10-13 01:25:34 +03:00
|
|
|
$pageUrl = $itemUrl . '?onglet=full';
|
|
|
|
$articlePage = getSimpleHTMLDOM($pageUrl, [], $opt);
|
2019-05-15 22:51:23 +03:00
|
|
|
|
2022-07-01 16:10:30 +03:00
|
|
|
// Extract the article content
|
|
|
|
$content = $articlePage->find('div.content-article', 0)->innertext;
|
|
|
|
$content = sanitize($content);
|
|
|
|
$content = defaultLinkTo($content, static::URI);
|
|
|
|
$item['content'] .= $content;
|
|
|
|
}
|
|
|
|
}
|
2019-05-15 22:51:23 +03:00
|
|
|
|
2022-07-01 16:10:30 +03:00
|
|
|
return $item;
|
|
|
|
}
|
2019-05-15 22:51:23 +03:00
|
|
|
}
|