find('div[class=displayList]', 0)->children();
$dateFormat = '%A %e %B %Y à %H:%M';
// Set locale and Timezone to parse the date
setlocale (LC_TIME, 'fr_FR.utf8');
date_default_timezone_set('Europe/Paris');
foreach($list as $element) {
if($element->tag == 'a') {
$articleURL = self::URI . $element->href;
$article = getSimpleHTMLDOM($articleURL);
$this->rewriteAudioPlayers($article);
// Reload the modified content
$article = str_get_html($article->save());
$textDOM = $article->find('article', 0);
// Initialise arrays
$item = array();
$audio = array();
$picture = array();
// Get the Main picture URL
$picture[] = self::URI . $article->find('div[id=pictureTitleSupport]', 0)->find('img', 0)->src;
$audioHTML = $article->find('audio');
// Add the audio element to the enclosure
foreach($audioHTML as $audioElement) {
$audioURL = $audioElement->src;
$audio[] = $audioURL;
}
// Rewrite pictures URL
$imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]');
foreach($imgs as $img) {
$img->src = $this->rewriteImage($img->src);
$article->save();
}
// Remove Google Ads
$ads = $article->find('div[class=adInline]');
foreach($ads as $ad) {
$ad->outertext = '';
$article->save();
}
// Remove Radio Melodie Logo
$logoHTML = $article->find('div[id=logoArticleRM]', 0);
$logoHTML->outertext = '';
$article->save();
$author = $article->find('p[class=AuthorName]', 0)->plaintext;
// Handle date to timestamp
$dateHTML = $article->find('p[class=date]', 0)->plaintext;
preg_match('/\| ([^-]*)( - .*|)$/', $dateHTML, $matches);
$dateText = $matches[1];
$dateArray = strptime($dateText, $dateFormat);
$timestamp = mktime(
$dateArray['tm_hour'],
$dateArray['tm_min'],
$dateArray['tm_sec'],
$dateArray['tm_mon'] + 1,
$dateArray['tm_mday'],
$dateArray['tm_year'] + 1900
);
$item['enclosures'] = array_merge($picture, $audio);
$item['author'] = $author;
$item['uri'] = $articleURL;
$item['title'] = $article->find('meta[property=og:title]', 0)->content;
if($timestamp !== false) {
$item['timestamp'] = $timestamp;
}
// Header Image
$header = '';
// Remove the Date and Author part
$textDOM->find('div[class=AuthorDate]', 0)->outertext = '';
// Remove Facebook javascript
$textDOM->find('script[src^=https://connect.facebook.net]', 0)->outertext = '';
// Rewrite relative Links
$textDOM = defaultLinkTo($textDOM, self::URI . '/');
$article->save();
//$this->rewriteAudioPlayers($textDOM);
$text = $textDOM->innertext;
$item['content'] = '