rss-bridge/bridges/TikTokBridge.php
sysadminstory f67d2eb88a
[TikTokBridge] Use embed iframe to bypass scraping protection (#3864)
The Tiktok Website was totally changed using some "scraping" protection
(passing as parameter value generated somewhere in the bunch of
javascript to the "API URL" that was before). The iframe embed does not
have such protection. It has less information (no date, ...) but it's
better than nothing !
2023-12-28 13:53:06 +01:00

103 lines
2.8 KiB
PHP

<?php
class TikTokBridge extends BridgeAbstract
{
const NAME = 'TikTok Bridge';
const URI = 'https://www.tiktok.com';
const DESCRIPTION = 'Returns posts';
const MAINTAINER = 'VerifiedJoseph';
const PARAMETERS = [
'By user' => [
'username' => [
'name' => 'Username',
'type' => 'text',
'required' => true,
'exampleValue' => '@tiktok',
]
]];
const TEST_DETECT_PARAMETERS = [
'https://www.tiktok.com/@tiktok' => [
'context' => 'By user', 'username' => '@tiktok'
]
];
const CACHE_TIMEOUT = 900; // 15 minutes
public function collectData()
{
$html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername());
$author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME;
$videos = $html->find('div[data-e2e=common-videoList-VideoContainer]');
foreach ($videos as $video) {
$item = [];
// Handle link "untracking"
$linkParts = parse_url($video->find('a', 0)->href);
$link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path'];
$image = $video->find('video', 0)->poster;
$views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext;
$enclosures = [$image];
$item['uri'] = $link;
$item['title'] = 'Video';
$item['author'] = '@' . $author;
$item['enclosures'] = $enclosures;
$item['content'] = <<<EOD
<a href="{$link}"><img src="{$image}"/></a>
<p>{$views} views<p><br/>
EOD;
$this->items[] = $item;
}
}
public function getURI()
{
switch ($this->queriedContext) {
case 'By user':
return self::URI . '/' . $this->processUsername();
default:
return parent::getURI();
}
}
public function getName()
{
switch ($this->queriedContext) {
case 'By user':
return $this->processUsername() . ' - TikTok';
default:
return parent::getName();
}
}
private function processUsername()
{
$username = trim($this->getInput('username'));
if (preg_match('#^https?://www\.tiktok\.com/@(.*)$#', $username, $m)) {
return '@' . $m[1];
}
if (substr($username, 0, 1) !== '@') {
return '@' . $username;
}
return $username;
}
public function detectParameters($url)
{
if (preg_match('/tiktok\.com\/(@[\w]+)/', $url, $matches) > 0) {
return [
'context' => 'By user',
'username' => $matches[1]
];
}
return null;
}
}