mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 17:45:40 +03:00
[TikTokBridge] Use embed iframe to bypass scraping protection (#3864)
The Tiktok Website was totally changed using some "scraping" protection (passing as parameter value generated somewhere in the bunch of javascript to the "API URL" that was before). The iframe embed does not have such protection. It has less information (no date, ...) but it's better than nothing !
This commit is contained in:
parent
5ab1924c4f
commit
f67d2eb88a
1 changed files with 23 additions and 43 deletions
|
@ -8,12 +8,12 @@ class TikTokBridge extends BridgeAbstract
|
||||||
const MAINTAINER = 'VerifiedJoseph';
|
const MAINTAINER = 'VerifiedJoseph';
|
||||||
const PARAMETERS = [
|
const PARAMETERS = [
|
||||||
'By user' => [
|
'By user' => [
|
||||||
'username' => [
|
'username' => [
|
||||||
'name' => 'Username',
|
'name' => 'Username',
|
||||||
'type' => 'text',
|
'type' => 'text',
|
||||||
'required' => true,
|
'required' => true,
|
||||||
'exampleValue' => '@tiktok',
|
'exampleValue' => '@tiktok',
|
||||||
]
|
]
|
||||||
]];
|
]];
|
||||||
|
|
||||||
const TEST_DETECT_PARAMETERS = [
|
const TEST_DETECT_PARAMETERS = [
|
||||||
|
@ -24,53 +24,33 @@ class TikTokBridge extends BridgeAbstract
|
||||||
|
|
||||||
const CACHE_TIMEOUT = 900; // 15 minutes
|
const CACHE_TIMEOUT = 900; // 15 minutes
|
||||||
|
|
||||||
private $feedName = '';
|
|
||||||
|
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
$html = getSimpleHTMLDOM($this->getURI());
|
$html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername());
|
||||||
|
|
||||||
$title = $html->find('h1', 0)->plaintext ?? self::NAME;
|
$author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME;
|
||||||
$this->feedName = htmlspecialchars_decode($title);
|
|
||||||
|
|
||||||
$var = $html->find('script[id=SIGI_STATE]', 0);
|
$videos = $html->find('div[data-e2e=common-videoList-VideoContainer]');
|
||||||
if (!$var) {
|
|
||||||
throw new \Exception('Unable to find tiktok user data for ' . $this->processUsername());
|
|
||||||
}
|
|
||||||
$SIGI_STATE_RAW = $var->innertext;
|
|
||||||
$SIGI_STATE = Json::decode($SIGI_STATE_RAW, false);
|
|
||||||
|
|
||||||
if (!isset($SIGI_STATE->ItemModule)) {
|
foreach ($videos as $video) {
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($SIGI_STATE->ItemModule as $key => $value) {
|
|
||||||
$item = [];
|
$item = [];
|
||||||
|
|
||||||
$link = 'https://www.tiktok.com/@' . $value->author . '/video/' . $value->id;
|
// Handle link "untracking"
|
||||||
$image = $value->video->dynamicCover;
|
$linkParts = parse_url($video->find('a', 0)->href);
|
||||||
if (empty($image)) {
|
$link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path'];
|
||||||
$image = $value->video->cover;
|
|
||||||
}
|
$image = $video->find('video', 0)->poster;
|
||||||
$views = $value->stats->playCount;
|
$views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext;
|
||||||
$hastags = [];
|
|
||||||
foreach ($value->textExtra as $tag) {
|
$enclosures = [$image];
|
||||||
$hastags[] = $tag->hashtagName;
|
|
||||||
}
|
|
||||||
$hastags_str = '';
|
|
||||||
foreach ($hastags as $tag) {
|
|
||||||
$hastags_str .= '<a href="https://www.tiktok.com/tag/' . $tag . '">#' . $tag . '</a> ';
|
|
||||||
}
|
|
||||||
|
|
||||||
$item['uri'] = $link;
|
$item['uri'] = $link;
|
||||||
$item['title'] = $value->desc;
|
$item['title'] = 'Video';
|
||||||
$item['timestamp'] = $value->createTime;
|
$item['author'] = '@' . $author;
|
||||||
$item['author'] = '@' . $value->author;
|
$item['enclosures'] = $enclosures;
|
||||||
$item['enclosures'][] = $image;
|
|
||||||
$item['categories'] = $hastags;
|
|
||||||
$item['content'] = <<<EOD
|
$item['content'] = <<<EOD
|
||||||
<a href="{$link}"><img src="{$image}"/></a>
|
<a href="{$link}"><img src="{$image}"/></a>
|
||||||
<p>{$views} views<p><br/>Hashtags: {$hastags_str}
|
<p>{$views} views<p><br/>
|
||||||
EOD;
|
EOD;
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
|
@ -91,7 +71,7 @@ EOD;
|
||||||
{
|
{
|
||||||
switch ($this->queriedContext) {
|
switch ($this->queriedContext) {
|
||||||
case 'By user':
|
case 'By user':
|
||||||
return $this->feedName . ' (' . $this->processUsername() . ') - TikTok';
|
return $this->processUsername() . ' - TikTok';
|
||||||
default:
|
default:
|
||||||
return parent::getName();
|
return parent::getName();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue