diff --git a/bridges/MistralAIBridge.php b/bridges/MistralAIBridge.php new file mode 100644 index 00000000..b1c357fe --- /dev/null +++ b/bridges/MistralAIBridge.php @@ -0,0 +1,70 @@ + [ + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => true, + 'defaultValue' => 10 + ], + ] + ]; + + public function collectData() + { + $html = getSimpleHTMLDOM(self::URI . 'news/'); + $limit = $this->getInput('limit'); + + $posts = $html->find('article.news-card'); + for ($i = 0; $i < min($limit, count($posts)); $i++) { + $post = $posts[$i]; + $url = self::URI . $post->find('a', 0)->href; + $this->parsePage($url); + } + } + + private function parsePage($url) + { + $html = getSimpleHTMLDOMCached($url, 7 * 24 * 60 * 60); + $title = $html->find('h1.hero-title', 0)->plaintext; + $timestamp_tag = $html->find('i.ti-calendar', 0)->parent; + $timestamp = DateTime::createFromFormat('F j, Y', $timestamp_tag->plaintext)->format('U'); + + $content = ''; + + // Subheader + $header = $html->find('p.hero-description', 0); + if ($header != null) { + $content .= $header->outertext; + } + + // Main content + $main = $html->find('$article > div.content', 0); + + // Mostly YouTube videos + $iframes = $main->find('iframe'); + foreach ($iframes as $iframe) { + $iframe->parent->removeAttribute('style'); + $iframe->outertext = '' . $iframe->src . ''; + } + + $main = defaultLinkTo($main, self::URI); + $content .= $main; + $this->items[] = [ + 'title' => $title, + 'timestamp' => $timestamp, + 'content' => $content, + 'uri' => $url, + ]; + } +}