2023-01-23 21:22:13 +03:00
|
|
|
<?php
|
|
|
|
|
2024-08-08 01:27:33 +03:00
|
|
|
declare(strict_types=1);
|
|
|
|
|
2023-01-23 21:22:13 +03:00
|
|
|
class TldrTechBridge extends BridgeAbstract
|
|
|
|
{
|
|
|
|
const MAINTAINER = 'sqrtminusone';
|
|
|
|
const NAME = 'TLDR Tech Newsletter Bridge';
|
|
|
|
const URI = 'https://tldr.tech/';
|
|
|
|
const DESCRIPTION = 'Return newsletter articles from TLDR Tech';
|
|
|
|
|
|
|
|
const PARAMETERS = [
|
|
|
|
'' => [
|
|
|
|
'limit' => [
|
|
|
|
'name' => 'Maximum number of articles to return',
|
|
|
|
'type' => 'number',
|
|
|
|
'required' => true,
|
|
|
|
'defaultValue' => 10
|
|
|
|
],
|
|
|
|
'topic' => [
|
|
|
|
'name' => 'Topic',
|
|
|
|
'type' => 'list',
|
|
|
|
'values' => [
|
|
|
|
'Tech' => 'tech',
|
2024-04-27 11:35:59 +03:00
|
|
|
'Web Dev' => 'webdev',
|
2023-07-26 23:59:49 +03:00
|
|
|
'AI' => 'ai',
|
2024-04-27 11:35:59 +03:00
|
|
|
'Information Security' => 'infosec',
|
|
|
|
'Product Management' => 'product',
|
|
|
|
'DevOps' => 'devops',
|
|
|
|
'Crypto' => 'crypto',
|
|
|
|
'Design' => 'design',
|
|
|
|
'Marketing' => 'marketing',
|
2023-08-08 07:00:07 +02:00
|
|
|
'Founders' => 'founders',
|
2023-01-23 21:22:13 +03:00
|
|
|
],
|
|
|
|
'defaultValue' => 'tech'
|
|
|
|
]
|
|
|
|
]
|
|
|
|
];
|
|
|
|
|
|
|
|
public function collectData()
|
|
|
|
{
|
2023-10-10 21:41:57 +02:00
|
|
|
$topic = $this->getInput('topic');
|
|
|
|
$limit = $this->getInput('limit');
|
2024-08-08 01:27:33 +03:00
|
|
|
|
|
|
|
$url = self::URI . 'api/latest/' . $topic;
|
|
|
|
$response = getContents($url, [], [], true);
|
|
|
|
$location = $response->getHeader('Location');
|
|
|
|
$locationUrl = Url::fromString($location);
|
|
|
|
|
|
|
|
$this->extractItem($locationUrl);
|
|
|
|
|
|
|
|
$archives_url = self::URI . $topic . '/archives';
|
|
|
|
$archives_html = getSimpleHTMLDOM($archives_url);
|
|
|
|
$entries_root = $archives_html->find('div.content-center.mt-5', 0);
|
2023-01-23 21:22:13 +03:00
|
|
|
foreach ($entries_root->children() as $child) {
|
|
|
|
if ($child->tag != 'a') {
|
|
|
|
continue;
|
|
|
|
}
|
2025-01-03 08:41:55 +01:00
|
|
|
$itemUrl = Url::fromString(self::URI . ltrim($child->href, '/'));
|
|
|
|
$this->extractItem($itemUrl);
|
2024-08-08 01:27:33 +03:00
|
|
|
if (count($this->items) >= $limit) {
|
2023-01-23 21:22:13 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-08 01:27:33 +03:00
|
|
|
private function extractItem(Url $url)
|
|
|
|
{
|
|
|
|
$pathParts = explode('/', $url->getPath());
|
|
|
|
$date = strtotime(end($pathParts));
|
|
|
|
try {
|
|
|
|
[$content, $title] = $this->extractContent($url);
|
|
|
|
|
|
|
|
$this->items[] = [
|
|
|
|
'uri' => (string) $url,
|
|
|
|
'title' => $title,
|
|
|
|
'timestamp' => $date,
|
|
|
|
'content' => $content,
|
|
|
|
];
|
|
|
|
} catch (HttpException $e) {
|
|
|
|
// archive occasionally returns broken URLs
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-10 21:41:57 +02:00
|
|
|
private function extractContent($url)
|
2023-01-23 21:22:13 +03:00
|
|
|
{
|
2024-08-08 01:27:33 +03:00
|
|
|
$html = getSimpleHTMLDOMCached($url);
|
2023-01-23 21:22:13 +03:00
|
|
|
$content = $html->find('div.content-center.mt-5', 0);
|
2023-10-10 21:41:57 +02:00
|
|
|
if (!$content) {
|
2024-08-08 01:27:33 +03:00
|
|
|
throw new \Exception('Could not find content');
|
2023-10-10 21:41:57 +02:00
|
|
|
}
|
2023-01-23 21:22:13 +03:00
|
|
|
$subscribe_form = $content->find('div.mt-5 > div > form', 0);
|
|
|
|
if ($subscribe_form) {
|
|
|
|
$content->removeChild($subscribe_form->parent->parent);
|
|
|
|
}
|
|
|
|
$privacy_link = $content->find("a[href='/privacy']", 0);
|
|
|
|
if ($privacy_link) {
|
|
|
|
$content->removeChild($privacy_link->parent->parent);
|
|
|
|
}
|
|
|
|
$headers = $content->find('h6.text-center.font-bold');
|
|
|
|
foreach ($headers as $header) {
|
|
|
|
$elem = $html->createElement('h3', $header->parent->plaintext);
|
|
|
|
$elem->style = 'margin-top: 1.2em; margin-bottom: 0.5em;';
|
|
|
|
$header_root = $header->parent;
|
|
|
|
foreach ($header_root->children() as $child) {
|
|
|
|
$header_root->removeChild($child);
|
|
|
|
}
|
|
|
|
$header_root->appendChild($elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach ($content->find('a.font-bold') as $a) {
|
|
|
|
$a->removeAttribute('class');
|
|
|
|
$elem = $html->createElement('b', $a->plaintext);
|
|
|
|
$a->removeChild($a->firstChild());
|
|
|
|
$a->appendChild($elem);
|
|
|
|
}
|
|
|
|
foreach ($content->children() as $child) {
|
|
|
|
if ($child->tag != 'div') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
foreach ($child->children() as $grandchild) {
|
|
|
|
if ($grandchild->tag == 'div') {
|
|
|
|
$grandchild->style = 'margin-bottom: 12px;';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-08-08 01:27:33 +03:00
|
|
|
$title = $content->find('h2', 0);
|
|
|
|
return [$content->innertext, $title->plaintext];
|
2023-01-23 21:22:13 +03:00
|
|
|
}
|
|
|
|
}
|