mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2024-11-22 09:35:28 +03:00
84b5ffcc7c
- Deal Origin was changed by the website : fixed the CSS class to get it - Shipping cost had an extra SVG image in the content : removed the whole HTML tags from the content
671 lines
22 KiB
PHP
671 lines
22 KiB
PHP
<?php
|
|
|
|
class PepperBridgeAbstract extends BridgeAbstract
|
|
{
|
|
const CACHE_TIMEOUT = 3600;
|
|
|
|
public function collectData()
|
|
{
|
|
switch ($this->queriedContext) {
|
|
case $this->i8n('context-keyword'):
|
|
return $this->collectDataKeywords();
|
|
break;
|
|
case $this->i8n('context-group'):
|
|
return $this->collectDataGroup();
|
|
break;
|
|
case $this->i8n('context-talk'):
|
|
return $this->collectDataTalk();
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the Deal data from the choosen group in the choosed order
|
|
*/
|
|
protected function collectDataGroup()
|
|
{
|
|
$url = $this->getGroupURI();
|
|
$this->collectDeals($url);
|
|
}
|
|
|
|
/**
|
|
* Get the Deal data from the choosen keywords and parameters
|
|
*/
|
|
protected function collectDataKeywords()
|
|
{
|
|
/* Even if the original website uses POST with the search page, GET works too */
|
|
$url = $this->getSearchURI();
|
|
$this->collectDeals($url);
|
|
}
|
|
|
|
/**
|
|
* Get the Deal data using the given URL
|
|
*/
|
|
protected function collectDeals($url)
|
|
{
|
|
$html = getSimpleHTMLDOM($url);
|
|
$list = $html->find('article[id]');
|
|
|
|
// Deal Image Link CSS Selector
|
|
$selectorImageLink = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'cept-thread-image-link',
|
|
'imgFrame',
|
|
'imgFrame--noBorder',
|
|
'thread-listImgCell',
|
|
]
|
|
);
|
|
|
|
// Deal Link CSS Selector
|
|
$selectorLink = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'cept-tt',
|
|
'thread-link',
|
|
'linkPlain',
|
|
]
|
|
);
|
|
|
|
// Deal Hotness CSS Selector
|
|
$selectorHot = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'vote-box'
|
|
]
|
|
);
|
|
|
|
// Deal Description CSS Selector
|
|
$selectorDescription = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'overflow--wrap-break'
|
|
]
|
|
);
|
|
|
|
// Deal Date CSS Selector
|
|
$selectorDate = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'size--all-s',
|
|
'flex',
|
|
'boxAlign-jc--all-fe'
|
|
]
|
|
);
|
|
|
|
// If there is no results, we don't parse the content because it display some random deals
|
|
$noresult = $html->find('h3[class=size--all-l]', 0);
|
|
if ($noresult != null && strpos($noresult->plaintext, $this->i8n('no-results')) !== false) {
|
|
$this->items = [];
|
|
} else {
|
|
foreach ($list as $deal) {
|
|
$item = [];
|
|
$item['uri'] = $this->getDealURI($deal);
|
|
$item['title'] = $this->getTitle($deal);
|
|
$item['author'] = $deal->find('span.thread-username', 0)->plaintext;
|
|
|
|
$item['content'] = '<table><tr><td><a href="'
|
|
. $item['uri']
|
|
. '"><img src="'
|
|
. $this->getImage($deal)
|
|
. '"/></td><td>'
|
|
. $this->getHTMLTitle($item)
|
|
. $this->getPrice($deal)
|
|
. $this->getDiscount($deal)
|
|
. $this->getShipsFrom($deal)
|
|
. $this->getShippingCost($deal)
|
|
. $this->getSource($deal)
|
|
. $deal->find('div[class*=' . $selectorDescription . ']', 0)->innertext
|
|
. '</td><td>'
|
|
. $deal->find('div[class*=' . $selectorHot . ']', 0)
|
|
->find('span', 0)->outertext
|
|
. '</td></table>';
|
|
|
|
// Check if a clock icon is displayed on the deal
|
|
$clocks = $deal->find('svg[class*=icon--clock]');
|
|
if ($clocks !== null && count($clocks) > 0) {
|
|
// Get the last clock, corresponding to the deal posting date
|
|
$clock = end($clocks);
|
|
|
|
// Find the text corresponding to the clock
|
|
$spanDateDiv = $clock->next_sibling();
|
|
$itemDate = $spanDateDiv->plaintext;
|
|
// In some case of a Local deal, there is no date, but we can use
|
|
// this case for other reason (like date not in the last field)
|
|
if ($this->contains($itemDate, $this->i8n('localdeal'))) {
|
|
$item['timestamp'] = time();
|
|
} elseif ($this->contains($itemDate, $this->i8n('relative-date-indicator'))) {
|
|
$item['timestamp'] = $this->relativeDateToTimestamp($itemDate);
|
|
} else {
|
|
$item['timestamp'] = $this->parseDate($itemDate);
|
|
}
|
|
}
|
|
$this->items[] = $item;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the Talk lastest comments
|
|
*/
|
|
protected function collectDataTalk()
|
|
{
|
|
$threadURL = $this->getInput('url');
|
|
$onlyWithUrl = $this->getInput('only_with_url');
|
|
|
|
// Get Thread ID from url passed in parameter
|
|
$threadSearch = preg_match('/-([0-9]{1,20})$/', $threadURL, $matches);
|
|
|
|
// Show an error message if we can't find the thread ID in the URL sent by the user
|
|
if ($threadSearch !== 1) {
|
|
returnClientError($this->i8n('thread-error'));
|
|
}
|
|
$threadID = $matches[1];
|
|
|
|
$url = $this->i8n('bridge-uri') . 'graphql';
|
|
|
|
// Get Cookies header to do the query
|
|
$cookiesHeaderValue = $this->getCookiesHeaderValue($url);
|
|
|
|
// GraphQL String
|
|
// This was extracted from https://www.dealabs.com/assets/js/modern/common_211b99.js
|
|
// This string was extracted during a Website visit, and minified using this neat tool :
|
|
// https://codepen.io/dangodev/pen/Baoqmoy
|
|
$graphqlString = <<<'HEREDOC'
|
|
query comments($filter:CommentFilter!,$limit:Int,$page:Int){comments(filter:$filter,limit:$limit,page:$page){
|
|
items{...commentFields}pagination{...paginationFields}}}fragment commentFields on Comment{commentId threadId url
|
|
preparedHtmlContent user{...userMediumAvatarFields...userNameFields...userPersonaFields bestBadge{...badgeFields}}
|
|
reactionCounts{type count}deletable currentUserReaction{type}reported reportable source status createdAt updatedAt
|
|
ignored popular deletedBy{username}notes{content createdAt user{username}}lastEdit{reason timeAgo userId}}fragment
|
|
userMediumAvatarFields on User{userId isDeletedOrPendingDeletion imageUrls(slot:"default",variations:
|
|
["user_small_avatar"])}fragment userNameFields on User{userId username isUserProfileHidden isDeletedOrPendingDeletion}
|
|
fragment userPersonaFields on User{persona{type text}}fragment badgeFields on Badge{badgeId level{...badgeLevelFields}}
|
|
fragment badgeLevelFields on BadgeLevel{key name description}fragment paginationFields on Pagination{count current last
|
|
next previous size order}
|
|
HEREDOC;
|
|
|
|
// Construct the JSON object to send to the Website
|
|
$queryArray = [
|
|
'query' => $graphqlString,
|
|
'variables' => [
|
|
'filter' => [
|
|
'threadId' => [
|
|
'eq' => $threadID,
|
|
],
|
|
'order' => [
|
|
'direction' => 'Descending',
|
|
],
|
|
|
|
],
|
|
'page' => 1,
|
|
],
|
|
];
|
|
$queryJSON = json_encode($queryArray);
|
|
|
|
// HTTP headers
|
|
$header = [
|
|
'Content-Type: application/json',
|
|
'Accept: application/json, text/plain, */*',
|
|
'X-Pepper-Txn: threads.show',
|
|
'X-Request-Type: application/vnd.pepper.v1+json',
|
|
'X-Requested-With: XMLHttpRequest',
|
|
"Cookie: $cookiesHeaderValue",
|
|
];
|
|
// CURL Options
|
|
$opts = [
|
|
CURLOPT_POST => 1,
|
|
CURLOPT_POSTFIELDS => $queryJSON
|
|
];
|
|
$json = getContents($url, $header, $opts);
|
|
$objects = json_decode($json);
|
|
foreach ($objects->data->comments->items as $comment) {
|
|
$item = [];
|
|
$item['uri'] = $comment->url;
|
|
$item['title'] = $comment->user->username . ' - ' . $comment->createdAt;
|
|
$item['author'] = $comment->user->username;
|
|
$item['content'] = $comment->preparedHtmlContent;
|
|
$item['uid'] = $comment->commentId;
|
|
// Timestamp handling needs a new parsing function
|
|
if ($onlyWithUrl == true) {
|
|
// Count Links and Quote Links
|
|
$content = str_get_html($item['content']);
|
|
$countLinks = count($content->find('a[href]'));
|
|
$countQuoteLinks = count($content->find('a[href][class=userHtml-quote-source]'));
|
|
// Only add element if there are Links ans more links tant Quote links
|
|
if ($countLinks > 0 && $countLinks > $countQuoteLinks) {
|
|
$this->items[] = $item;
|
|
}
|
|
} else {
|
|
$this->items[] = $item;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract the cookies obtained from the URL
|
|
* @return array the array containing the cookies set by the URL
|
|
*/
|
|
private function getCookiesHeaderValue($url)
|
|
{
|
|
$response = getContents($url, [], [], true);
|
|
$setCookieHeaders = $response['headers']['set-cookie'] ?? [];
|
|
$cookies = array_map(fn($c): string => explode(';', $c)[0], $setCookieHeaders);
|
|
return implode('; ', $cookies);
|
|
}
|
|
|
|
/**
|
|
* Check if the string $str contains any of the string of the array $arr
|
|
* @return boolean true if the string matched anything otherwise false
|
|
*/
|
|
private function contains($str, array $arr)
|
|
{
|
|
foreach ($arr as $a) {
|
|
if (stripos($str, $a) !== false) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Get the Price from a Deal if it exists
|
|
* @return string String of the deal price
|
|
*/
|
|
private function getPrice($deal)
|
|
{
|
|
if (
|
|
$deal->find(
|
|
'span[class*=thread-price]',
|
|
0
|
|
) != null
|
|
) {
|
|
return '<div>' . $this->i8n('price') . ' : '
|
|
. $deal->find(
|
|
'span[class*=thread-price]',
|
|
0
|
|
)->plaintext
|
|
. '</div>';
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the Title from a Deal if it exists
|
|
* @return string String of the deal title
|
|
*/
|
|
private function getTitle($deal)
|
|
{
|
|
$titleRoot = $deal->find('div[class*=threadGrid-title]', 0);
|
|
$titleA = $titleRoot->find('a[class*=thread-link]', 0);
|
|
$titleFirstChild = $titleRoot->first_child();
|
|
if ($titleA !== null) {
|
|
$title = $titleA->plaintext;
|
|
} else {
|
|
// In some case, expired deals have a different format
|
|
$title = $titleRoot->find('span', 0)->plaintext;
|
|
}
|
|
|
|
return $title;
|
|
}
|
|
|
|
/**
|
|
* Get the Title from a Talk if it exists
|
|
* @return string String of the Talk title
|
|
*/
|
|
private function getTalkTitle()
|
|
{
|
|
$html = getSimpleHTMLDOMCached($this->getInput('url'));
|
|
$title = $html->find('.thread-title', 0)->plaintext;
|
|
return $title;
|
|
}
|
|
|
|
/**
|
|
* Get the HTML Title code from an item
|
|
* @return string String of the deal title
|
|
*/
|
|
private function getHTMLTitle($item)
|
|
{
|
|
if ($item['uri'] == '') {
|
|
$html = '<h2>' . $item['title'] . '</h2>';
|
|
} else {
|
|
$html = '<h2><a href="' . $item['uri'] . '">'
|
|
. $item['title'] . '</a></h2>';
|
|
}
|
|
|
|
return $html;
|
|
}
|
|
|
|
/**
|
|
* Get the URI from a Deal if it exists
|
|
* @return string String of the deal URI
|
|
*/
|
|
private function getDealURI($deal)
|
|
{
|
|
$dealId = $deal->attr['id'];
|
|
$uri = $this->i8n('bridge-uri') . $this->i8n('uri-deal') . str_replace('_', '-', $dealId);
|
|
return $uri;
|
|
}
|
|
|
|
/**
|
|
* Get the Shipping costs from a Deal if it exists
|
|
* @return string String of the deal shipping Cost
|
|
*/
|
|
private function getShippingCost($deal)
|
|
{
|
|
if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0) != null) {
|
|
if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1) != null) {
|
|
return '<div>' . $this->i8n('shipping') . ' : '
|
|
. strip_tags($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1)->innertext)
|
|
. '</div>';
|
|
} else {
|
|
return '<div>' . $this->i8n('shipping') . ' : '
|
|
. strip_tags($deal->find('span[class*=text--color-greyShade flex--inline]', 0)->innertext)
|
|
. '</div>';
|
|
}
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the source of a Deal if it exists
|
|
* @return string String of the deal source
|
|
*/
|
|
private function getSource($deal)
|
|
{
|
|
if (($origin = $deal->find('button[class*=text--color-greyShade]', 0)) != null) {
|
|
$path = str_replace(' ', '/', trim(Json::decode($origin->{'data-cloak-link'})['path']));
|
|
$text = $origin->find('span[class*=link]', 0);
|
|
return '<div>' . $this->i8n('origin') . ' : <a href="' . static::URI . $path . '">' . $text . '</a></div>';
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the original Price and discout from a Deal if it exists
|
|
* @return string String of the deal original price and discount
|
|
*/
|
|
private function getDiscount($deal)
|
|
{
|
|
if ($deal->find('span[class*=mute--text text--lineThrough]', 0) != null) {
|
|
$discountHtml = $deal->find('span[class=space--ml-1 size--all-l size--fromW3-xl]', 0);
|
|
if ($discountHtml != null) {
|
|
$discount = $discountHtml->plaintext;
|
|
} else {
|
|
$discount = '';
|
|
}
|
|
return '<div>' . $this->i8n('discount') . ' : <span style="text-decoration: line-through;">'
|
|
. $deal->find(
|
|
'span[class*=mute--text text--lineThrough]',
|
|
0
|
|
)->plaintext
|
|
. '</span> '
|
|
. $discount
|
|
. '</div>';
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the Picture URL from a Deal if it exists
|
|
* @return string String of the deal Picture URL
|
|
*/
|
|
private function getImage($deal)
|
|
{
|
|
$selectorLazy = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'thread-image',
|
|
'width--all-auto',
|
|
'height--all-auto',
|
|
'imgFrame-img',
|
|
'img--dummy',
|
|
'js-lazy-img'
|
|
]
|
|
);
|
|
|
|
$selectorPlain = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'thread-image',
|
|
'width--all-auto',
|
|
'height--all-auto',
|
|
'imgFrame-img',
|
|
]
|
|
);
|
|
if ($deal->find('img[class=' . $selectorLazy . ']', 0) != null) {
|
|
return json_decode(
|
|
html_entity_decode(
|
|
$deal->find('img[class=' . $selectorLazy . ']', 0)
|
|
->getAttribute('data-lazy-img')
|
|
)
|
|
)->{'src'};
|
|
} else {
|
|
return $deal->find('img[class*=' . $selectorPlain . ']', 0)->src ?? '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the originating country from a Deal if it exists
|
|
* @return string String of the deal originating country
|
|
*/
|
|
private function getShipsFrom($deal)
|
|
{
|
|
$selector = implode(
|
|
' ', /* Notice this is a space! */
|
|
[
|
|
'hide--toW2',
|
|
'metaRibbon',
|
|
]
|
|
);
|
|
if ($deal->find('span[class*=' . $selector . ']', 0) != null) {
|
|
$children = $deal->find('span[class*=' . $selector . ']', 0)->children(2);
|
|
if ($children) {
|
|
return '<div>' . $children->plaintext . '</div>';
|
|
}
|
|
}
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Transforms a local date into a timestamp
|
|
* @return int timestamp of the input date
|
|
*/
|
|
private function parseDate($string)
|
|
{
|
|
$month_local = $this->i8n('local-months');
|
|
$month_en = [
|
|
'January',
|
|
'February',
|
|
'March',
|
|
'April',
|
|
'May',
|
|
'June',
|
|
'July',
|
|
'August',
|
|
'September',
|
|
'October',
|
|
'November',
|
|
'December'
|
|
];
|
|
|
|
// A date can be prfixed with some words, we remove theme
|
|
$string = $this->removeDatePrefixes($string);
|
|
// We translate the local months name in the english one
|
|
$date_str = trim(str_replace($month_local, $month_en, $string));
|
|
|
|
// If the date does not contain any year, we add the current year
|
|
if (!preg_match('/[0-9]{4}/', $string)) {
|
|
$date_str .= ' ' . date('Y');
|
|
}
|
|
|
|
// Add the Hour and minutes
|
|
$date_str .= ' 00:00';
|
|
$date = DateTime::createFromFormat('j F Y H:i', $date_str);
|
|
// In some case, the date is not recognized : as a workaround the actual date is taken
|
|
if ($date === false) {
|
|
$date = new DateTime();
|
|
}
|
|
return $date->getTimestamp();
|
|
}
|
|
|
|
/**
|
|
* Remove the prefix of a date if it has one
|
|
* @return the date without prefiux
|
|
*/
|
|
private function removeDatePrefixes($string)
|
|
{
|
|
$string = str_replace($this->i8n('date-prefixes'), [], $string);
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* Remove the suffix of a relative date if it has one
|
|
* @return the relative date without suffixes
|
|
*/
|
|
private function removeRelativeDateSuffixes($string)
|
|
{
|
|
if (count($this->i8n('relative-date-ignore-suffix')) > 0) {
|
|
$string = preg_replace($this->i8n('relative-date-ignore-suffix'), '', $string);
|
|
}
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* Transforms a relative local date into a timestamp
|
|
* @return int timestamp of the input date
|
|
*/
|
|
private function relativeDateToTimestamp($str)
|
|
{
|
|
$date = new DateTime();
|
|
|
|
// In case of update date, replace it by the regular relative date first word
|
|
$str = str_replace($this->i8n('relative-date-alt-prefixes'), $this->i8n('local-time-relative')[0], $str);
|
|
|
|
$str = $this->removeRelativeDateSuffixes($str);
|
|
|
|
$search = $this->i8n('local-time-relative');
|
|
|
|
$replace = [
|
|
'-',
|
|
'minute',
|
|
'hour',
|
|
'day',
|
|
'month',
|
|
'year',
|
|
''
|
|
];
|
|
$date->modify(str_replace($search, $replace, $str));
|
|
return $date->getTimestamp();
|
|
}
|
|
|
|
/**
|
|
* Returns the RSS Feed title according to the parameters
|
|
* @return string the RSS feed Tiyle
|
|
*/
|
|
public function getName()
|
|
{
|
|
switch ($this->queriedContext) {
|
|
case $this->i8n('context-keyword'):
|
|
return $this->i8n('bridge-name') . ' - ' . $this->i8n('title-keyword') . ' : ' . $this->getInput('q');
|
|
break;
|
|
case $this->i8n('context-group'):
|
|
return $this->i8n('bridge-name') . ' - ' . $this->i8n('title-group') . ' : ' . $this->getKey('group');
|
|
break;
|
|
case $this->i8n('context-talk'):
|
|
return $this->i8n('bridge-name') . ' - ' . $this->i8n('title-talk') . ' : ' . $this->getTalkTitle();
|
|
break;
|
|
default: // Return default value
|
|
return static::NAME;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the RSS Feed URI according to the parameters
|
|
* @return string the RSS feed Title
|
|
*/
|
|
public function getURI()
|
|
{
|
|
switch ($this->queriedContext) {
|
|
case $this->i8n('context-keyword'):
|
|
return $this->getSearchURI();
|
|
break;
|
|
case $this->i8n('context-group'):
|
|
return $this->getGroupURI();
|
|
break;
|
|
case $this->i8n('context-talk'):
|
|
return $this->getTalkURI();
|
|
break;
|
|
default: // Return default value
|
|
return static::URI;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the RSS Feed URI for a keyword Feed
|
|
* @return string the RSS feed URI
|
|
*/
|
|
private function getSearchURI()
|
|
{
|
|
$q = $this->getInput('q');
|
|
$hide_expired = $this->getInput('hide_expired');
|
|
$hide_local = $this->getInput('hide_local');
|
|
$priceFrom = $this->getInput('priceFrom');
|
|
$priceTo = $this->getInput('priceTo');
|
|
$url = $this->i8n('bridge-uri')
|
|
. 'search/advanced?q='
|
|
. urlencode($q)
|
|
. '&hide_expired=' . $hide_expired
|
|
. '&hide_local=' . $hide_local
|
|
. '&priceFrom=' . $priceFrom
|
|
. '&priceTo=' . $priceTo
|
|
/* Some default parameters
|
|
* search_fields : Search in Titres & Descriptions & Codes
|
|
* sort_by : Sort the search by new deals
|
|
* time_frame : Search will not be on a limited timeframe
|
|
*/
|
|
. '&search_fields[]=1&search_fields[]=2&search_fields[]=3&sort_by=new&time_frame=0';
|
|
return $url;
|
|
}
|
|
|
|
/**
|
|
* Returns the RSS Feed URI for a group Feed
|
|
* @return string the RSS feed URI
|
|
*/
|
|
private function getGroupURI()
|
|
{
|
|
$group = $this->getInput('group');
|
|
$order = $this->getInput('order');
|
|
|
|
$url = $this->i8n('bridge-uri')
|
|
. $this->i8n('uri-group') . $group . $order;
|
|
return $url;
|
|
}
|
|
|
|
/**
|
|
* Returns the RSS Feed URI for a Talk Feed
|
|
* @return string the RSS feed URI
|
|
*/
|
|
private function getTalkURI()
|
|
{
|
|
$url = $this->getInput('url');
|
|
return $url;
|
|
}
|
|
|
|
/**
|
|
* This is some "localisation" function that returns the needed content using
|
|
* the "$lang" class variable in the local class
|
|
* @return various the local content needed
|
|
*/
|
|
protected function i8n($key)
|
|
{
|
|
if (array_key_exists($key, $this->lang)) {
|
|
return $this->lang[$key];
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
}
|