From 799c93a3c60075c4514ec180053b89b62793939a Mon Sep 17 00:00:00 2001 From: jNullj Date: Mon, 10 Jan 2022 12:44:18 +0200 Subject: [PATCH] [ExplosmBridge] Rewrite to work without feedburner (#2417) * [ExplosmBridge] Rewrite to work without feedburner re-wrote the bridge to scrap from the new explosm site as the old method of using feedburner is not working anymore, feedburner is stuck on dec/22 when the explosm site changed. --- bridges/ExplosmBridge.php | 57 ++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/bridges/ExplosmBridge.php b/bridges/ExplosmBridge.php index 876f10e5..cfe42195 100644 --- a/bridges/ExplosmBridge.php +++ b/bridges/ExplosmBridge.php @@ -1,22 +1,59 @@ array( + 'limit' => array( + 'name' => 'Posts limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + 'defaultValue' => 5 + ) + ) + ); public function collectData(){ - $this->collectExpandableDatas('https://feeds.feedburner.com/Explosm'); - } + $limit = $this->getInput('limit'); + $latest = getSimpleHTMLDOM('https://explosm.net/comics/latest'); + $image = $latest->find('div[id=comic]', 0)->find('img', 0)->getAttribute('src'); + $date_string = $latest->find('p[class*=Author__P]', 0)->innertext; + $next_data_string = $latest->find('script[id=__NEXT_DATA__]', 0)->innertext; + $exp = '/{\\\"latest\\\":\[{\\\"slug\\\":\\\"(.*?)\\ /'; + $reg_array = array(); + preg_match($exp, $next_data_string, $reg_array); + $comic_id = $reg_array[1]; + $comic_id = substr($comic_id, 0, strpos($comic_id, '\\')); + $item = array(); + $item['uri'] = $this::URI . 'comics/' . $comic_id; + $item['uid'] = $this::URI . 'comics/' . $comic_id; + $item['title'] = 'Comic for ' . $date_string; + $item['timestamp'] = strtotime($date_string); + $item['author'] = $latest->find('p[class*=Author__P]', 2)->innertext; + $item['content'] = ''; + $this->items[] = $item; - protected function parseItem($feedItem){ - $item = parent::parseItem($feedItem); - $comicpage = getSimpleHTMLDOM($item['uri']); - $image = $comicpage->find('div[id=comic-wrap]', 0)->find('img', 0)->getAttribute('src'); - $item['content'] = ''; - - return $item; + $next_comic = substr($this::URI, 0, -1) + . $latest->find('div[class*=MainComic__Selector]', 0)->find('a', 0)->getAttribute('href'); + // use index 1 as the latest comic was already found + for ($i = 1; $i <= $limit; $i++) { + $this_comic = getSimpleHTMLDOM($next_comic); + $image = $this_comic->find('div[id=comic]', 0)->find('img', 0)->getAttribute('src'); + $date_string = $this_comic->find('p[class*=Author__P]', 0)->innertext; + $item = array(); + $item['uri'] = $next_comic; + $item['uid'] = $next_comic; + $item['title'] = 'Comic for ' . $date_string; + $item['timestamp'] = strtotime($date_string); + $item['author'] = $this_comic->find('p[class*=Author__P]', 2)->innertext; + $item['content'] = ''; + $this->items[] = $item; + $next_comic = substr($this::URI, 0, -1) + . $this_comic->find('div[class*=MainComic__Selector]', 0)->find('a', 0)->getAttribute('href'); // get next comic link + } } }