fetchData($html); foreach ($html->find('li[id^="screenshot-"]') as $shot) { $item = []; $additional_data = $this->findJsonForShot($shot, $data); if ($additional_data === null) { $item['uri'] = self::URI . $shot->find('a', 0)->href; $item['title'] = $shot->find('.shot-title', 0)->plaintext; } else { $item['timestamp'] = strtotime($additional_data['published_at']); $item['uri'] = self::URI . $additional_data['path']; $item['title'] = $additional_data['title']; } $item['author'] = trim($shot->find('.user-information .display-name', 0)->plaintext); $description = $shot->find('.comment', 0); $item['content'] = $description === null ? '' : $description->plaintext; $preview_path = $shot->find('figure img', 1)->attr['data-srcset']; $item['content'] .= $this->getImageTag($preview_path, $item['title']); $item['enclosures'] = [$this->getFullSizeImagePath($preview_path)]; $this->items[] = $item; } } private function fetchData($html) { $scripts = $html->find('script'); foreach ($scripts as $script) { if (strpos($script->innertext, 'newestShots') !== false) { // fix single quotes $script->innertext = preg_replace('/\'(.*)\'(,?)$/im', '"\1"\2', $script->innertext); // fix JavaScript JSON (why do they not adhere to the standard?) $script->innertext = preg_replace('/^(\s*)(\w+):/im', '\1"\2":', $script->innertext); // fix relative dates, so they are recognized by strtotime $script->innertext = preg_replace('/"about ([0-9]+ hours? ago)"(,?)$/im', '"\1"\2', $script->innertext); // find beginning of JSON array $start = strpos($script->innertext, '['); // find end of JSON array, compensate for missing character! $end = strpos($script->innertext, '];') + 1; // convert JSON to PHP array $json = substr($script->innertext, $start, $end - $start); try { // TODO: fix broken json return Json::decode($json); } catch (\JsonException $e) { return []; } } } return []; } private function findJsonForShot($shot, $json) { foreach ($json as $element) { if (strpos($shot->getAttribute('id'), (string)$element['id']) !== false) { return $element; } } return null; } private function getImageTag($preview_path, $title) { return sprintf( '
%s', $this->getFullSizeImagePath($preview_path), $preview_path, $title ); } private function getFullSizeImagePath($preview_path) { // Get last image from srcset $src_set_urls = explode(',', $preview_path); $url = end($src_set_urls); $url = explode(' ', $url)[1]; return htmlspecialchars_decode($url); } }