30s!), keep the cache timeout high to avoid killing the host */ $html = getSimpleHTMLDOMCached($this->getURI() . '/pt/noticias-listagem.aspx'); foreach ($html->find('.list_news .item') as $element) { $item = []; $item_link = $element->find('.body h2.title a', 0); /* Another broken URL, see also `bridges/ComboiosDePortugalBridge.php` */ $item['uri'] = self::URI . implode('/', array_map('urlencode', explode('/', $item_link->href))); $item['title'] = $item_link->innertext; $item['timestamp'] = str_ireplace( array_map(function ($name) { return ' ' . $name . ' '; }, self::PT_MONTH_NAMES), array_map(function ($num) { return sprintf('-%02d-', $num); }, range(1, sizeof(self::PT_MONTH_NAMES))), $element->find('span.date', 0)->innertext ); /* Fix the Image URL */ $item_image = $element->find('img.thumb', 0); $item_image->src = preg_replace('/.*&img=([^&]+).*/', '\1', $item_image->getAttribute('data-src')); /* Content: */ /* - Image */ /* - Category */ $content = $item_image . '