From 586d707ae48b74b150eb101f2e9986d0bd18d89f Mon Sep 17 00:00:00 2001 From: July Date: Sat, 9 Sep 2023 03:19:09 -0400 Subject: [PATCH] [ArsTechnicaBridge] Add new bridge (#3657) --- bridges/ArsTechnicaBridge.php | 71 +++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 bridges/ArsTechnicaBridge.php diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php new file mode 100644 index 00000000..1e3e6379 --- /dev/null +++ b/bridges/ArsTechnicaBridge.php @@ -0,0 +1,71 @@ + [ + 'name' => 'Site section', + 'type' => 'list', + 'defaultValue' => 'index', + 'values' => [ + 'All' => 'index', + 'Apple' => 'apple', + 'Board Games' => 'cardboard', + 'Cars' => 'cars', + 'Features' => 'features', + 'Gaming' => 'gaming', + 'Information Technology' => 'technology-lab', + 'Science' => 'science', + 'Staff Blogs' => 'staff-blogs', + 'Tech Policy' => 'tech-policy', + 'Tech' => 'gadgets', + ] + ] + ]]; + + public function collectData() + { + $url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section'); + $this->collectExpandableDatas($url); + } + + protected function parseItem($newItem) + { + $item = parent::parseItem($newItem); + + $item_html = getSimpleHTMLDOMCached($item['uri'] . '&'); + $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] = $item_html->find('.amp-wp-article-content', 0); + + // remove various ars advertising + $item['content']->find('#social-left', 0)->remove(); + foreach ($item['content']->find('.ars-component-buy-box') as $ad) { + $ad->remove(); + } + foreach ($item['content']->find('i-amphtml-sizer') as $ad) { + $ad->remove(); + } + foreach ($item['content']->find('.sidebar') as $ad) { + $ad->remove(); + } + + foreach ($item['content']->find('a') as $link) { //remove amp redirect links + $url = $link->getAttribute('href'); + if (str_contains($url, 'go.redirectingat.com')) { + $url = extractFromDelimiters($url, 'url=', '&'); + $url = urldecode($url); + $link->setAttribute('href', $url); + } + } + + $item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content'])); + + $item['uid'] = explode('=', $item['uri'])[1]; + + return $item; + } +}