2013-08-15 14:05:58 +04:00
< ? php
2022-07-01 16:10:30 +03:00
2013-08-15 14:05:58 +04:00
/**
2016-07-08 20:06:35 +03:00
* RssBridgeYoutube
2013-08-15 14:05:58 +04:00
* Returns the newest videos
2015-11-01 14:17:36 +03:00
* WARNING : to parse big playlists ( over ~ 90 videos ), you need to edit simple_html_dom . php :
2014-05-14 16:34:06 +04:00
* change : define ( 'MAX_FILE_SIZE' , 600000 );
* into : define ( 'MAX_FILE_SIZE' , 900000 ); ( or more )
2013-08-15 14:05:58 +04:00
*/
2015-11-01 14:17:36 +03:00
class YoutubeBridge extends BridgeAbstract
{
2016-08-30 12:23:55 +03:00
const NAME = 'YouTube Bridge' ;
2023-09-22 06:40:13 +03:00
const URI = 'https://www.youtube.com' ;
2023-10-15 01:08:18 +03:00
const CACHE_TIMEOUT = 60 * 60 * 3 ;
2016-08-30 12:23:55 +03:00
const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search' ;
2022-07-01 16:10:30 +03:00
2017-02-11 18:16:56 +03:00
const PARAMETERS = [
'By username' => [
'u' => [
'name' => 'username' ,
2022-03-24 13:59:34 +03:00
'exampleValue' => 'LinusTechTips' ,
2017-02-11 18:16:56 +03:00
'required' => true
2022-07-01 16:10:30 +03:00
]
2017-02-11 18:16:56 +03:00
],
'By channel id' => [
'c' => [
'name' => 'channel id' ,
2022-03-24 13:59:34 +03:00
'exampleValue' => 'UCw38-8_Ibv_L6hlKChHO9dQ' ,
2017-02-11 18:16:56 +03:00
'required' => true
2022-07-01 16:10:30 +03:00
]
2017-02-11 18:16:56 +03:00
],
2021-09-11 11:20:14 +03:00
'By custom name' => [
'custom' => [
'name' => 'custom name' ,
2022-03-24 13:59:34 +03:00
'exampleValue' => 'LinusTechTips' ,
2021-09-11 11:20:14 +03:00
'required' => true
2022-07-01 16:10:30 +03:00
]
2021-09-11 11:20:14 +03:00
],
2017-02-11 18:16:56 +03:00
'By playlist Id' => [
'p' => [
'name' => 'playlist id' ,
2022-03-24 13:59:34 +03:00
'exampleValue' => 'PL8mG-RkN2uTzJc8N0EoyhdC54prvBBLpj' ,
'required' => true
2022-07-01 16:10:30 +03:00
]
2017-02-11 18:16:56 +03:00
],
'Search result' => [
's' => [
'name' => 'search keyword' ,
2022-03-24 13:59:34 +03:00
'exampleValue' => 'LinusTechTips' ,
'required' => true
2017-02-11 18:16:56 +03:00
],
'pa' => [
'name' => 'page' ,
'type' => 'number' ,
2021-09-11 11:20:14 +03:00
'title' => 'This option is not work anymore, as YouTube will always return the same page' ,
2017-02-11 18:16:56 +03:00
'exampleValue' => 1
2022-07-01 16:10:30 +03:00
]
2018-07-21 15:22:53 +03:00
],
'global' => [
'duration_min' => [
'name' => 'min. duration (minutes)' ,
'type' => 'number' ,
'title' => 'Minimum duration for the video in minutes' ,
'exampleValue' => 5
],
'duration_max' => [
'name' => 'max. duration (minutes)' ,
'type' => 'number' ,
'title' => 'Maximum duration for the video in minutes' ,
'exampleValue' => 10
2022-07-01 16:10:30 +03:00
]
]
2017-02-11 18:16:56 +03:00
];
2022-07-01 16:10:30 +03:00
2018-07-21 15:22:53 +03:00
private $feedName = '' ;
2021-09-11 11:20:14 +03:00
private $feeduri = '' ;
2023-09-22 06:40:13 +03:00
private $feedIconUrl = '' ;
2021-09-11 11:20:14 +03:00
// This took from repo BetterVideoRss of VerifiedJoseph.
const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims' ; //phpcs:ignore
2023-09-10 22:50:15 +03:00
2023-10-15 01:08:18 +03:00
public function collectData ()
{
$cacheKey = 'youtube_rate_limit' ;
if ( $this -> cache -> get ( $cacheKey )) {
throw new HttpException ( '429 Too Many Requests' , 429 );
}
try {
$this -> collectDataInternal ();
} catch ( HttpException $e ) {
if ( $e -> getCode () === 429 ) {
$this -> cache -> set ( $cacheKey , true , 60 * 16 );
}
throw $e ;
}
}
2023-09-10 22:50:15 +03:00
private function collectDataInternal ()
{
$html = '' ;
$url_feed = '' ;
$url_listing = '' ;
2023-10-15 01:08:18 +03:00
$username = $this -> getInput ( 'u' );
$channel = $this -> getInput ( 'c' );
$custom = $this -> getInput ( 'custom' );
2023-10-15 04:15:47 +03:00
$playlist = $this -> getInput ( 'p' );
$search = $this -> getInput ( 's' );
$durationMin = $this -> getInput ( 'duration_min' );
$durationMax = $this -> getInput ( 'duration_max' );
// Whether to discriminate videos by duration
$filterByDuration = $durationMin || $durationMax ;
2023-10-15 01:08:18 +03:00
if ( $username ) {
// user and channel
2023-10-16 04:43:18 +03:00
$url_feed = self :: URI . '/feeds/videos.xml?user=' . urlencode ( $username );
$url_listing = self :: URI . '/user/' . urlencode ( $username ) . '/videos' ;
2023-10-15 01:08:18 +03:00
} elseif ( $channel ) {
2023-10-16 04:43:18 +03:00
$url_feed = self :: URI . '/feeds/videos.xml?channel_id=' . urlencode ( $channel );
$url_listing = self :: URI . '/channel/' . urlencode ( $channel ) . '/videos' ;
2023-10-15 01:08:18 +03:00
} elseif ( $custom ) {
2023-10-16 04:43:18 +03:00
$url_listing = self :: URI . '/' . urlencode ( $custom ) . '/videos' ;
2023-09-10 22:50:15 +03:00
}
2023-10-15 01:08:18 +03:00
if ( $url_feed || $url_listing ) {
// user, channel or custom
2023-09-10 22:50:15 +03:00
$this -> feeduri = $url_listing ;
2023-10-15 01:08:18 +03:00
if ( $custom ) {
// Extract the feed url for the custom name
$html = $this -> fetch ( $url_listing );
$jsonData = $this -> extractJsonFromHtml ( $html );
// Pluck out the rss feed url
2023-09-10 22:50:15 +03:00
$url_feed = $jsonData -> metadata -> channelMetadataRenderer -> rssUrl ;
2023-09-22 06:40:13 +03:00
$this -> feedIconUrl = $jsonData -> metadata -> channelMetadataRenderer -> avatar -> thumbnails [ 0 ] -> url ;
2023-09-10 22:50:15 +03:00
}
2023-10-15 01:08:18 +03:00
if ( $filterByDuration ) {
if ( ! $custom ) {
// Fetch the html page
$html = $this -> fetch ( $url_listing );
$jsonData = $this -> extractJsonFromHtml ( $html );
2023-09-10 22:50:15 +03:00
}
$channel_id = '' ;
if ( isset ( $jsonData -> contents )) {
$channel_id = $jsonData -> metadata -> channelMetadataRenderer -> externalId ;
$jsonData = $jsonData -> contents -> twoColumnBrowseResultsRenderer -> tabs [ 1 ];
$jsonData = $jsonData -> tabRenderer -> content -> richGridRenderer -> contents ;
// $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
2023-10-15 01:08:18 +03:00
$this -> fetchItemsFromFromJsonData ( $jsonData );
2023-09-10 22:50:15 +03:00
} else {
2023-10-16 04:43:18 +03:00
returnServerError ( 'Unable to get data from YouTube' );
2023-09-10 22:50:15 +03:00
}
2023-10-15 01:08:18 +03:00
} else {
// Fetch the xml feed
$html = $this -> fetch ( $url_feed );
$this -> extractItemsFromXmlFeed ( $html );
2023-09-10 22:50:15 +03:00
}
$this -> feedName = str_replace ( ' - YouTube' , '' , $html -> find ( 'title' , 0 ) -> plaintext );
2023-10-15 01:08:18 +03:00
} elseif ( $playlist ) {
// playlist
2023-10-16 04:43:18 +03:00
$url_feed = self :: URI . '/feeds/videos.xml?playlist_id=' . urlencode ( $playlist );
$url_listing = self :: URI . '/playlist?list=' . urlencode ( $playlist );
2023-10-15 01:08:18 +03:00
$html = $this -> fetch ( $url_listing );
$jsonData = $this -> extractJsonFromHtml ( $html );
2023-09-10 22:50:15 +03:00
// TODO: this method returns only first 100 video items
// if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
2023-12-21 11:24:22 +03:00
$jsonData = $jsonData -> contents -> twoColumnBrowseResultsRenderer -> tabs [ 0 ] ? ? null ;
if ( ! $jsonData ) {
// playlist probably doesnt exists
throw new \Exception ( 'Unable to find playlist: ' . $url_listing );
}
2023-09-10 22:50:15 +03:00
$jsonData = $jsonData -> tabRenderer -> content -> sectionListRenderer -> contents [ 0 ] -> itemSectionRenderer ;
$jsonData = $jsonData -> contents [ 0 ] -> playlistVideoListRenderer -> contents ;
$item_count = count ( $jsonData );
2023-10-15 01:08:18 +03:00
if ( $item_count > 15 || $filterByDuration ) {
$this -> fetchItemsFromFromJsonData ( $jsonData );
2023-09-10 22:50:15 +03:00
} else {
2023-10-15 01:08:18 +03:00
$xml = $this -> fetch ( $url_feed );
$this -> extractItemsFromXmlFeed ( $xml );
2023-09-10 22:50:15 +03:00
}
$this -> feedName = 'Playlist: ' . str_replace ( ' - YouTube' , '' , $html -> find ( 'title' , 0 ) -> plaintext );
usort ( $this -> items , function ( $item1 , $item2 ) {
if ( ! is_int ( $item1 [ 'timestamp' ]) && ! is_int ( $item2 [ 'timestamp' ])) {
$item1 [ 'timestamp' ] = strtotime ( $item1 [ 'timestamp' ]);
$item2 [ 'timestamp' ] = strtotime ( $item2 [ 'timestamp' ]);
}
return $item2 [ 'timestamp' ] - $item1 [ 'timestamp' ];
});
2023-10-15 01:08:18 +03:00
} elseif ( $search ) {
// search
2023-10-16 04:43:18 +03:00
$url_listing = self :: URI . '/results?search_query=' . urlencode ( $search ) . '&sp=CAI%253D' ;
2023-10-15 01:08:18 +03:00
$html = $this -> fetch ( $url_listing );
$jsonData = $this -> extractJsonFromHtml ( $html );
2023-09-10 22:50:15 +03:00
$jsonData = $jsonData -> contents -> twoColumnSearchResultsRenderer -> primaryContents ;
2024-05-06 00:30:23 +03:00
$jsonData = $jsonData -> sectionListRenderer -> contents [ 0 ] -> itemSectionRenderer -> contents ;
2023-10-15 01:08:18 +03:00
$this -> fetchItemsFromFromJsonData ( $jsonData );
2023-09-10 22:50:15 +03:00
$this -> feeduri = $url_listing ;
2023-10-16 04:43:18 +03:00
$this -> feedName = 'Search: ' . $search ;
2023-09-10 22:50:15 +03:00
} else {
2023-10-16 04:43:18 +03:00
returnClientError ( " You must either specify either: \n - YouTube username (?u=...) \n - Channel id (?c=...) \n - Playlist id (?p=...) \n - Search (?s=...) " );
2023-09-10 22:50:15 +03:00
}
}
2023-10-15 04:15:47 +03:00
private function fetchVideoDetails ( $videoId , & $author , & $description , & $timestamp )
2017-02-11 18:16:56 +03:00
{
2023-10-15 04:15:47 +03:00
$url = self :: URI . " /watch?v= $videoId " ;
2023-10-15 01:08:18 +03:00
$html = $this -> fetch ( $url , true );
2022-07-01 16:10:30 +03:00
2018-02-17 00:11:03 +03:00
// Skip unavailable videos
2021-05-16 23:02:45 +03:00
if ( strpos ( $html -> innertext , 'IS_UNAVAILABLE_PAGE' ) !== false ) {
2018-02-17 00:11:03 +03:00
return ;
}
2022-07-01 16:10:30 +03:00
2021-05-16 23:02:45 +03:00
$elAuthor = $html -> find ( 'span[itemprop=author] > link[itemprop=name]' , 0 );
if ( ! is_null ( $elAuthor )) {
$author = $elAuthor -> getAttribute ( 'content' );
}
2022-07-01 16:10:30 +03:00
2021-05-16 23:02:45 +03:00
$elDatePublished = $html -> find ( 'meta[itemprop=datePublished]' , 0 );
if ( ! is_null ( $elDatePublished )) {
2023-10-15 04:15:47 +03:00
$timestamp = strtotime ( $elDatePublished -> getAttribute ( 'content' ));
2022-07-01 16:10:30 +03:00
}
2023-10-15 01:08:18 +03:00
$jsonData = $this -> extractJsonFromHtml ( $html );
2023-09-22 06:40:13 +03:00
if ( ! isset ( $jsonData -> contents )) {
2023-03-11 22:06:01 +03:00
return ;
}
2022-07-01 16:10:30 +03:00
2023-09-23 18:50:41 +03:00
$jsonData = $jsonData -> contents -> twoColumnWatchNextResults -> results -> results -> contents ? ? null ;
if ( ! $jsonData ) {
throw new \Exception ( 'Unable to find json data' );
}
2021-05-16 23:02:45 +03:00
$videoSecondaryInfo = null ;
foreach ( $jsonData as $item ) {
if ( isset ( $item -> videoSecondaryInfoRenderer )) {
$videoSecondaryInfo = $item -> videoSecondaryInfoRenderer ;
break ;
2018-02-17 00:12:24 +03:00
}
2022-07-01 16:10:30 +03:00
}
2021-05-16 23:02:45 +03:00
if ( ! $videoSecondaryInfo ) {
2023-10-15 04:15:47 +03:00
returnServerError ( 'Could not find videoSecondaryInfoRenderer. Error at: ' . $videoId );
2021-09-11 11:20:14 +03:00
}
2022-07-01 16:10:30 +03:00
2023-10-15 04:15:47 +03:00
$description = $videoSecondaryInfo -> attributedDescription -> content ? ? '' ;
2023-09-22 06:40:13 +03:00
// Default whitespace chars used by trim + non-breaking spaces (https://en.wikipedia.org/wiki/Non-breaking_space)
$whitespaceChars = " \t \n \r \0 \x0B \ u { A0} \ u { 2060} \ u { 202F} \ u { 2007} " ;
2023-10-15 04:15:47 +03:00
$descEnhancements = $this -> ytBridgeGetVideoDescriptionEnhancements ( $videoSecondaryInfo , $description , self :: URI , $whitespaceChars );
2023-09-22 06:40:13 +03:00
foreach ( $descEnhancements as $descEnhancement ) {
if ( isset ( $descEnhancement [ 'url' ])) {
2023-10-15 04:15:47 +03:00
$descBefore = mb_substr ( $description , 0 , $descEnhancement [ 'pos' ]);
$descValue = mb_substr ( $description , $descEnhancement [ 'pos' ], $descEnhancement [ 'len' ]);
$descAfter = mb_substr ( $description , $descEnhancement [ 'pos' ] + $descEnhancement [ 'len' ], null );
2023-09-22 06:40:13 +03:00
// Extended trim for the display value of internal links, e.g.:
// FAVICON • Video Name
// FAVICON / @ChannelName
$descValue = trim ( $descValue , $whitespaceChars . '•/' );
2023-10-15 04:15:47 +03:00
$description = sprintf ( '%s<a href="%s" target="_blank">%s</a>%s' , $descBefore , $descEnhancement [ 'url' ], $descValue , $descAfter );
2023-09-22 06:40:13 +03:00
}
}
}
private function ytBridgeGetVideoDescriptionEnhancements (
object $videoSecondaryInfo ,
string $descriptionContent ,
string $baseUrl ,
string $whitespaceChars
) : array {
$commandRuns = $videoSecondaryInfo -> attributedDescription -> commandRuns ? ? [];
if ( count ( $commandRuns ) <= 0 ) {
return [];
}
$enhancements = [];
$boundaryWhitespaceChars = mb_str_split ( $whitespaceChars );
$boundaryStartChars = array_merge ( $boundaryWhitespaceChars , [ ':' , '-' , '(' ]);
$boundaryEndChars = array_merge ( $boundaryWhitespaceChars , [ ',' , '.' , " ' " , ')' ]);
$hashtagBoundaryEndChars = array_merge ( $boundaryEndChars , [ '#' , '-' ]);
$descriptionContentLength = mb_strlen ( $descriptionContent );
$minPositionOffset = 0 ;
$prevStartPosition = 0 ;
$totalLength = 0 ;
$maxPositionByStartIndex = [];
foreach ( array_reverse ( $commandRuns ) as $commandRun ) {
$endPosition = $commandRun -> startIndex + $commandRun -> length ;
if ( $endPosition < $prevStartPosition ) {
$totalLength += 1 ;
}
$totalLength += $commandRun -> length ;
$maxPositionByStartIndex [ $commandRun -> startIndex ] = $totalLength ;
$prevStartPosition = $commandRun -> startIndex ;
}
foreach ( $commandRuns as $commandRun ) {
$commandMetadata = $commandRun -> onTap -> innertubeCommand -> commandMetadata -> webCommandMetadata ? ? null ;
if ( ! isset ( $commandMetadata )) {
continue ;
}
$enhancement = null ;
/*
$commandRun -> startIndex can be offset by few positions in the positive direction
when some multibyte characters ( e . g . emojis , but maybe also others ) are used in the plain text video description .
( probably some difference between php and javascript in handling multibyte characters )
This loop should correct the position in most cases . It searches for the next word ( determined by a set of boundary chars ) with the expected length .
Several safeguards ensure that the correct word is chosen . When a link can not be matched ,
everything will be discarded to prevent corrupting the description .
Hashtags require a different set of boundary chars .
*/
$isHashtag = $commandMetadata -> webPageType === 'WEB_PAGE_TYPE_BROWSE' ;
$prevEnhancement = end ( $enhancements );
$minPosition = $prevEnhancement === false ? 0 : $prevEnhancement [ 'pos' ] + $prevEnhancement [ 'len' ];
$maxPosition = $descriptionContentLength - $maxPositionByStartIndex [ $commandRun -> startIndex ];
$position = min ( $commandRun -> startIndex - $minPositionOffset , $maxPosition );
while ( $position >= $minPosition ) {
// The link display value can only ever include a new line at the end (which will be removed further below), never in between.
$newLinePosition = mb_strpos ( $descriptionContent , " \n " , $position );
if ( $newLinePosition !== false && $newLinePosition < $position + ( $commandRun -> length - 1 )) {
$position = $newLinePosition - ( $commandRun -> length - 1 );
continue ;
}
$firstChar = mb_substr ( $descriptionContent , $position , 1 );
$boundaryStart = mb_substr ( $descriptionContent , $position - 1 , 1 );
$boundaryEndIndex = $position + $commandRun -> length ;
$boundaryEnd = mb_substr ( $descriptionContent , $boundaryEndIndex , 1 );
$boundaryStartIsValid = $position === 0 ||
in_array ( $boundaryStart , $boundaryStartChars ) ||
( $isHashtag && $firstChar === '#' );
$boundaryEndIsValid = $boundaryEndIndex === $descriptionContentLength ||
in_array ( $boundaryEnd , $isHashtag ? $hashtagBoundaryEndChars : $boundaryEndChars );
if ( $boundaryStartIsValid && $boundaryEndIsValid ) {
$minPositionOffset = $commandRun -> startIndex - $position ;
$enhancement = [
'pos' => $position ,
'len' => $commandRun -> length ,
];
break ;
2022-07-01 16:10:30 +03:00
}
2023-09-22 06:40:13 +03:00
$position -- ;
}
if ( ! isset ( $enhancement )) {
$this -> logger -> debug ( sprintf ( 'Position %d cannot be corrected in "%s"' , $commandRun -> startIndex , substr ( $descriptionContent , 0 , 50 ) . '...' ));
// Skip to prevent the description from becoming corrupted
continue ;
2021-09-11 11:20:14 +03:00
}
2023-09-22 06:40:13 +03:00
// $commandRun->length sometimes incorrectly includes the newline as last char
$lastChar = mb_substr ( $descriptionContent , $enhancement [ 'pos' ] + $enhancement [ 'len' ] - 1 , 1 );
if ( $lastChar === " \n " ) {
$enhancement [ 'len' ] -= 1 ;
}
$commandUrl = parse_url ( $commandMetadata -> url );
if ( $commandUrl [ 'path' ] === '/redirect' ) {
parse_str ( $commandUrl [ 'query' ], $commandUrlQuery );
$enhancement [ 'url' ] = urldecode ( $commandUrlQuery [ 'q' ]);
2023-10-15 01:08:18 +03:00
} elseif ( isset ( $commandUrl [ 'host' ])) {
2023-09-22 06:40:13 +03:00
$enhancement [ 'url' ] = $commandMetadata -> url ;
} else {
$enhancement [ 'url' ] = $baseUrl . $commandMetadata -> url ;
}
$enhancements [] = $enhancement ;
}
if ( count ( $enhancements ) !== count ( $commandRuns )) {
// At least one link can not be matched. Discard everything to prevent corrupting the description.
return [];
2021-09-11 11:20:14 +03:00
}
2023-09-22 06:40:13 +03:00
// Sort by position in descending order to be able to safely replace values
return array_reverse ( $enhancements );
2021-05-16 23:02:45 +03:00
}
2022-07-01 16:10:30 +03:00
2023-10-15 01:08:18 +03:00
private function extractItemsFromXmlFeed ( $xml )
2021-09-11 11:20:14 +03:00
{
2023-10-15 01:08:18 +03:00
$this -> feedName = $this -> decodeTitle ( $xml -> find ( 'feed > title' , 0 ) -> plaintext );
2022-07-01 16:10:30 +03:00
2017-07-29 20:28:00 +03:00
foreach ( $xml -> find ( 'entry' ) as $element ) {
2023-10-15 01:08:18 +03:00
$videoId = str_replace ( 'yt:video:' , '' , $element -> find ( 'id' , 0 ) -> plaintext );
if ( strpos ( $videoId , 'googleads' ) !== false ) {
continue ;
}
$title = $this -> decodeTitle ( $element -> find ( 'title' , 0 ) -> plaintext );
2016-05-04 00:45:32 +03:00
$author = $element -> find ( 'name' , 0 ) -> plaintext ;
$desc = $element -> find ( 'media:description' , 0 ) -> innertext ;
2017-04-27 22:40:20 +03:00
$desc = htmlspecialchars ( $desc );
$desc = nl2br ( $desc );
2023-10-15 01:08:18 +03:00
$desc = preg_replace ( self :: URI_REGEX , '<a href="$1" target="_blank">$1</a> ' , $desc );
2016-05-04 00:45:32 +03:00
$time = strtotime ( $element -> find ( 'published' , 0 ) -> plaintext );
2023-10-15 01:08:18 +03:00
$this -> addItem ( $videoId , $title , $author , $desc , $time );
2022-07-01 16:10:30 +03:00
}
2016-05-21 12:45:09 +03:00
}
2022-07-01 16:10:30 +03:00
2023-10-15 01:08:18 +03:00
private function fetch ( $url , bool $cache = false )
2019-06-10 16:31:35 +03:00
{
2023-10-15 01:08:18 +03:00
$header = [ 'Accept-Language: en-US' ];
2023-10-15 04:15:47 +03:00
$ttl = 86400 * 3 ; // 3d
2023-10-15 02:13:17 +03:00
$stripNewlines = false ;
2023-10-15 01:08:18 +03:00
if ( $cache ) {
2023-10-15 02:13:17 +03:00
return getSimpleHTMLDOMCached ( $url , $ttl , $header , [], true , true , DEFAULT_TARGET_CHARSET , $stripNewlines );
2019-06-10 16:31:35 +03:00
}
2023-10-15 02:13:17 +03:00
return getSimpleHTMLDOM ( $url , $header , [], true , true , DEFAULT_TARGET_CHARSET , $stripNewlines );
2017-04-27 22:40:20 +03:00
}
2022-07-01 16:10:30 +03:00
2023-10-15 01:08:18 +03:00
private function extractJsonFromHtml ( $html )
2021-09-11 11:20:14 +03:00
{
$scriptRegex = '/var ytInitialData = (.*?);<\/script>/' ;
2023-07-09 11:08:30 +03:00
$result = preg_match ( $scriptRegex , $html , $matches );
if ( ! $result ) {
2023-09-21 23:05:55 +03:00
$this -> logger -> debug ( 'Could not find ytInitialData' );
2023-07-09 11:08:30 +03:00
return null ;
}
2023-10-15 01:08:18 +03:00
$data = json_decode ( $matches [ 1 ]);
return $data ;
2021-09-11 11:20:14 +03:00
}
2022-07-01 16:10:30 +03:00
2023-10-15 01:08:18 +03:00
private function fetchItemsFromFromJsonData ( $jsonData )
2021-09-11 11:20:14 +03:00
{
2023-10-15 04:15:47 +03:00
$minimumDurationSeconds = ( $this -> getInput ( 'duration_min' ) ? : - 1 ) * 60 ;
$maximumDurationSeconds = ( $this -> getInput ( 'duration_max' ) ? : INF ) * 60 ;
2022-07-01 16:10:30 +03:00
2021-09-11 11:20:14 +03:00
foreach ( $jsonData as $item ) {
$wrapper = null ;
if ( isset ( $item -> gridVideoRenderer )) {
$wrapper = $item -> gridVideoRenderer ;
} elseif ( isset ( $item -> videoRenderer )) {
$wrapper = $item -> videoRenderer ;
} elseif ( isset ( $item -> playlistVideoRenderer )) {
$wrapper = $item -> playlistVideoRenderer ;
2022-12-18 10:23:18 +03:00
} elseif ( isset ( $item -> richItemRenderer )) {
$wrapper = $item -> richItemRenderer -> content -> videoRenderer ;
2021-09-11 11:20:14 +03:00
} else {
continue ;
2022-07-01 16:10:30 +03:00
}
2023-10-15 04:15:47 +03:00
// 01:03:30 | 15:06 | 1:24
$lengthText = $wrapper -> lengthText -> simpleText ? ? null ;
// 6,875 views
$viewCount = $wrapper -> viewCountText -> simpleText ? ? null ;
// Dc645M8Het8
2023-10-15 01:08:18 +03:00
$videoId = $wrapper -> videoId ;
2023-10-15 04:15:47 +03:00
// Jumbo frames - transfer more data faster!
$title = $wrapper -> title -> runs [ 0 ] -> text ? ? $wrapper -> title -> accessibility -> accessibilityData -> label ? ? null ;
$author = null ;
$description = $wrapper -> descriptionSnippet -> runs [ 0 ] -> text ? ? null ;
// 5 days ago | 1 month ago
$publishedTimeText = $wrapper -> publishedTimeText -> simpleText ? ? $wrapper -> videoInfo -> runs [ 2 ] -> text ? ? null ;
$timestamp = null ;
if ( $publishedTimeText ) {
try {
$publicationDate = new \DateTimeImmutable ( $publishedTimeText );
// Hard-code hour, minute and second
$publicationDate = $publicationDate -> setTime ( 0 , 0 , 0 );
$timestamp = $publicationDate -> getTimestamp ();
} catch ( \Exception $e ) {
}
}
2021-09-11 11:20:14 +03:00
$durationText = 0 ;
2023-10-15 04:15:47 +03:00
if ( $lengthText ) {
$durationText = $lengthText ;
2021-09-11 11:20:14 +03:00
} else {
foreach ( $wrapper -> thumbnailOverlays as $overlay ) {
if ( isset ( $overlay -> thumbnailOverlayTimeStatusRenderer )) {
$durationText = $overlay -> thumbnailOverlayTimeStatusRenderer -> text ;
break ;
}
2022-07-01 16:10:30 +03:00
}
}
2023-07-09 11:08:30 +03:00
if ( is_string ( $durationText )) {
if ( preg_match ( '/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/' , $durationText )) {
$durationText = preg_replace ( '/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/' , '$1:$2:$3' , $durationText );
} else {
$durationText = preg_replace ( '/([\d]{1,2})\:([\d]{2})/' , '00:$1:$2' , $durationText );
}
sscanf ( $durationText , '%d:%d:%d' , $hours , $minutes , $seconds );
$duration = $hours * 3600 + $minutes * 60 + $seconds ;
2023-10-15 04:15:47 +03:00
if ( $duration < $minimumDurationSeconds || $duration > $maximumDurationSeconds ) {
2023-07-09 11:08:30 +03:00
continue ;
}
2021-09-11 11:20:14 +03:00
}
2023-10-15 04:15:47 +03:00
if ( ! $description || ! $timestamp ) {
$this -> fetchVideoDetails ( $videoId , $author , $description , $timestamp );
}
$this -> addItem ( $videoId , $title , $author , $description , $timestamp );
if ( count ( $this -> items ) >= 99 ) {
break ;
2023-10-15 01:08:18 +03:00
}
}
}
2023-10-15 04:15:47 +03:00
private function addItem ( $videoId , $title , $author , $description , $timestamp , $thumbnail = '' )
2023-10-15 01:08:18 +03:00
{
2023-10-15 04:15:47 +03:00
$description = nl2br ( $description );
2023-10-15 01:08:18 +03:00
$item = [];
// This should probably be uid?
$item [ 'id' ] = $videoId ;
$item [ 'title' ] = $title ;
2023-10-15 04:15:47 +03:00
$item [ 'author' ] = $author ? ? '' ;
$item [ 'timestamp' ] = $timestamp ;
2023-10-15 01:08:18 +03:00
$item [ 'uri' ] = self :: URI . '/watch?v=' . $videoId ;
if ( ! $thumbnail ) {
// Fallback to default thumbnail if there aren't any provided.
$thumbnail = '0' ;
2021-09-11 11:20:14 +03:00
}
2023-10-15 01:08:18 +03:00
$thumbnailUri = str_replace ( '/www.' , '/img.' , self :: URI ) . '/vi/' . $videoId . '/' . $thumbnail . '.jpg' ;
2023-10-15 04:15:47 +03:00
$item [ 'content' ] = sprintf ( '<a href="%s"><img src="%s" /></a><br />%s' , $item [ 'uri' ], $thumbnailUri , $description );
2023-10-15 01:08:18 +03:00
$this -> items [] = $item ;
2022-07-01 16:10:30 +03:00
}
2023-10-15 01:08:18 +03:00
private function decodeTitle ( $title )
2018-07-21 15:22:53 +03:00
{
2023-10-15 01:08:18 +03:00
// convert both Ӓ and " to UTF-8
return html_entity_decode ( $title , ENT_QUOTES , 'UTF-8' );
2018-07-21 15:22:53 +03:00
}
2022-07-01 16:10:30 +03:00
2021-05-16 23:02:45 +03:00
public function getURI ()
{
if ( ! is_null ( $this -> getInput ( 'p' ))) {
2023-09-22 06:40:13 +03:00
return static :: URI . '/playlist?list=' . $this -> getInput ( 'p' );
2021-09-11 11:20:14 +03:00
} elseif ( $this -> feeduri ) {
return $this -> feeduri ;
2021-05-16 23:02:45 +03:00
}
2022-07-01 16:10:30 +03:00
2021-05-16 23:02:45 +03:00
return parent :: getURI ();
}
2022-07-01 16:10:30 +03:00
2014-06-20 19:00:36 +04:00
public function getName ()
{
2017-12-29 04:14:11 +03:00
switch ( $this -> queriedContext ) {
case 'By username' :
case 'By channel id' :
2021-09-11 11:20:14 +03:00
case 'By custom name' :
2017-12-29 04:14:11 +03:00
case 'By playlist Id' :
case 'Search result' :
2023-09-10 22:50:15 +03:00
return htmlspecialchars_decode ( $this -> feedName ) . ' - YouTube' ;
2017-12-29 04:19:35 +03:00
default :
return parent :: getName ();
2017-12-29 04:14:11 +03:00
}
2018-06-23 22:28:27 +03:00
}
2023-06-01 22:26:47 +03:00
public function getIcon ()
{
2023-09-22 06:40:13 +03:00
if ( empty ( $this -> feedIconUrl )) {
2023-06-01 22:26:47 +03:00
return parent :: getIcon ();
} else {
2023-09-22 06:40:13 +03:00
return $this -> feedIconUrl ;
2023-06-01 22:26:47 +03:00
}
}
2013-08-15 14:05:58 +04:00
}