mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-03-14 20:21:14 +03:00
[Threads] add bridge (#3805)
* initial working Threads bridge * properly specify a default limit * phpcs formatted
This commit is contained in:
parent
2b741b1c1b
commit
b037d1b4d1
1 changed files with 120 additions and 0 deletions
120
bridges/ThreadsBridge.php
Normal file
120
bridges/ThreadsBridge.php
Normal file
|
@ -0,0 +1,120 @@
|
|||
<?php
|
||||
|
||||
class ThreadsBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Threads';
|
||||
const URI = 'https://www.threads.net/';
|
||||
const DESCRIPTION = 'Say more with Threads — Instagram's new text app.';
|
||||
const MAINTAINER = 'mdemoss';
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
const PARAMETERS = [
|
||||
'By username' => [
|
||||
'u' => [
|
||||
'name' => 'username',
|
||||
'required' => true,
|
||||
'exampleValue' => 'zuck',
|
||||
'title' => 'Insert a user name'
|
||||
],
|
||||
'limit' => [
|
||||
'name' => 'Limit',
|
||||
'type' => 'number',
|
||||
'required' => false,
|
||||
'title' => 'Specify number of posts to fetch',
|
||||
'defaultValue' => 5
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
protected $feedName = self::NAME;
|
||||
public function getName()
|
||||
{
|
||||
return $this->feedName;
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
// By username
|
||||
$regex = '/^(https?:\/\/)?(www\.)?threads\.net\/(@)?([^\/?\n]+)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'By username';
|
||||
$params['u'] = urldecode($matches[3]);
|
||||
return $params;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
return self::URI . '@' . $this->getInput('u');
|
||||
}
|
||||
|
||||
// https://stackoverflow.com/a/3975706/421140
|
||||
// Found this in FlaschenpostBridge, modified to return an array and take an object.
|
||||
private function recursiveFind($haystack, $needle)
|
||||
{
|
||||
$found = [];
|
||||
$iterator = new \RecursiveArrayIterator($haystack);
|
||||
$recursive = new \RecursiveIteratorIterator(
|
||||
$iterator,
|
||||
\RecursiveIteratorIterator::SELF_FIRST
|
||||
);
|
||||
foreach ($recursive as $key => $value) {
|
||||
if ($key === $needle) {
|
||||
$found[] = $value;
|
||||
}
|
||||
}
|
||||
return $found;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOMCached($this->getURI(), static::CACHE_TIMEOUT);
|
||||
Debug::log(sprintf('Fetched: %s', $this->getURI()));
|
||||
$jsonBlobs = $html->find('script[type="application/json"]');
|
||||
Debug::log(sprintf('%d JSON blobs found.', count($jsonBlobs)));
|
||||
$gatheredCodes = [];
|
||||
$limit = $this->getInput('limit');
|
||||
foreach ($jsonBlobs as $jsonBlob) {
|
||||
// The structure of the JSON document is likely to change, but we're looking for a "code" inside a "post"
|
||||
foreach ($this->recursiveFind($this->recursiveFind(json_decode($jsonBlob->innertext), 'post'), 'code') as $candidateCode) {
|
||||
// code should be like CzZk4-USq1O or Cy3m1VnRiwP or Cywjyrdv9T6 or CzZk4-USq1O
|
||||
if (grapheme_strlen($candidateCode) == 11 and !in_array($candidateCode, $gatheredCodes)) {
|
||||
$gatheredCodes[] = $candidateCode;
|
||||
if (count($gatheredCodes) >= $limit) {
|
||||
break 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Debug::log(sprintf('Candidate codes found in JSON in script tags: %s', print_r($gatheredCodes, true)));
|
||||
|
||||
$this->feedName = html_entity_decode($html->find('meta[property=og:title]', 0)->content);
|
||||
// todo: meta[property=og:description] could populate the feed description
|
||||
|
||||
foreach ($gatheredCodes as $postCode) {
|
||||
$item = [];
|
||||
// post URL is like: https://www.threads.net/@zuck/post/Czrr520PZfh
|
||||
$item['uri'] = $this->getURI() . '/post/' . $postCode;
|
||||
$articleHtml = getSimpleHTMLDOMCached($item['uri'], 15778800); // cache time: six months
|
||||
|
||||
// Relying on meta tags ought to be more reliable.
|
||||
if ($articleHtml->find('meta[property=og:type]', 0)->content != 'article') {
|
||||
continue;
|
||||
}
|
||||
$item['title'] = $articleHtml->find('meta[property=og:description]', 0)->content;
|
||||
$item['content'] = $articleHtml->find('meta[property=og:description]', 0)->content;
|
||||
$item['author'] = html_entity_decode($articleHtml->find('meta[property=og:title]', 0)->content);
|
||||
|
||||
$imageUrl = $articleHtml->find('meta[property=og:image]', 0);
|
||||
if ($imageUrl) {
|
||||
$item['enclosures'][] = html_entity_decode($imageUrl->content);
|
||||
}
|
||||
|
||||
// todo: parse hashtags out of content for $item['categories']
|
||||
// todo: try to scrape out a timestamp for $item['timestamp'], it's not in the meta tags
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue