rss-bridge/bridges/PepperBridgeAbstract.php
sysadminstory 55ffac5bae
[PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] (#3876)
Fix the Deal source link

The HTML does not contain the link to the "Deal source anymore", now only an
attribute does contain the information about the Deal Source.

The JSON data is now extraced for each Deal, and used to get the
Temperature and Deal Source.
2024-01-05 07:23:40 +01:00

699 lines
23 KiB
PHP

<?php
class PepperBridgeAbstract extends BridgeAbstract
{
const CACHE_TIMEOUT = 3600;
public function collectData()
{
switch ($this->queriedContext) {
case $this->i8n('context-keyword'):
return $this->collectDataKeywords();
break;
case $this->i8n('context-group'):
return $this->collectDataGroup();
break;
case $this->i8n('context-talk'):
return $this->collectDataTalk();
break;
}
}
/**
* Get the Deal data from the choosen group in the choosed order
*/
protected function collectDataGroup()
{
$url = $this->getGroupURI();
$this->collectDeals($url);
}
/**
* Get the Deal data from the choosen keywords and parameters
*/
protected function collectDataKeywords()
{
/* Even if the original website uses POST with the search page, GET works too */
$url = $this->getSearchURI();
$this->collectDeals($url);
}
/**
* Get the Deal data using the given URL
*/
protected function collectDeals($url)
{
$html = getSimpleHTMLDOM($url);
$list = $html->find('article[id]');
// Deal Image Link CSS Selector
$selectorImageLink = implode(
' ', /* Notice this is a space! */
[
'cept-thread-image-link',
'imgFrame',
'imgFrame--noBorder',
'thread-listImgCell',
]
);
// Deal Link CSS Selector
$selectorLink = implode(
' ', /* Notice this is a space! */
[
'cept-tt',
'thread-link',
'linkPlain',
]
);
// Deal Hotness CSS Selector
$selectorHot = implode(
' ', /* Notice this is a space! */
[
'vote-box'
]
);
// Deal Description CSS Selector
$selectorDescription = implode(
' ', /* Notice this is a space! */
[
'overflow--wrap-break'
]
);
// Deal Date CSS Selector
$selectorDate = implode(
' ', /* Notice this is a space! */
[
'size--all-s',
'flex',
'boxAlign-jc--all-fe'
]
);
// If there is no results, we don't parse the content because it display some random deals
$noresult = $html->find('h3[class*=text--b]', 0);
if ($noresult != null && strpos($noresult->plaintext, $this->i8n('no-results')) !== false) {
$this->items = [];
} else {
foreach ($list as $deal) {
$item = [];
$item['uri'] = $this->getDealURI($deal);
$item['title'] = $this->getTitle($deal);
$item['author'] = $deal->find('span.thread-username', 0)->plaintext;
// Get the JSON Data stored as vue
$jsonDealData = $this->getDealJsonData($deal);
$item['content'] = '<table><tr><td><a href="'
. $item['uri']
. '"><img src="'
. $this->getImage($deal)
. '"/></td><td>'
. $this->getHTMLTitle($item)
. $this->getPrice($deal)
. $this->getDiscount($deal)
. $this->getShipsFrom($deal)
. $this->getShippingCost($deal)
. $this->getSource($jsonDealData)
. $deal->find('div[class*=' . $selectorDescription . ']', 0)->innertext
. '</td><td>'
. $this->getTemperature($jsonDealData)
. '</td></table>';
// Check if a clock icon is displayed on the deal
$clocks = $deal->find('svg[class*=icon--clock]');
if ($clocks !== null && count($clocks) > 0) {
// Get the last clock, corresponding to the deal posting date
$clock = end($clocks);
// Find the text corresponding to the clock
$spanDateDiv = $clock->next_sibling();
$itemDate = $spanDateDiv->plaintext;
// In some case of a Local deal, there is no date, but we can use
// this case for other reason (like date not in the last field)
if ($this->contains($itemDate, $this->i8n('localdeal'))) {
$item['timestamp'] = time();
} elseif ($this->contains($itemDate, $this->i8n('relative-date-indicator'))) {
$item['timestamp'] = $this->relativeDateToTimestamp($itemDate);
} else {
$item['timestamp'] = $this->parseDate($itemDate);
}
}
$this->items[] = $item;
}
}
}
/**
* Get the Talk lastest comments
*/
protected function collectDataTalk()
{
$threadURL = $this->getInput('url');
$onlyWithUrl = $this->getInput('only_with_url');
// Get Thread ID from url passed in parameter
$threadSearch = preg_match('/-([0-9]{1,20})$/', $threadURL, $matches);
// Show an error message if we can't find the thread ID in the URL sent by the user
if ($threadSearch !== 1) {
returnClientError($this->i8n('thread-error'));
}
$threadID = $matches[1];
$url = $this->i8n('bridge-uri') . 'graphql';
// Get Cookies header to do the query
$cookiesHeaderValue = $this->getCookiesHeaderValue($url);
// GraphQL String
// This was extracted from https://www.dealabs.com/assets/js/modern/common_211b99.js
// This string was extracted during a Website visit, and minified using this neat tool :
// https://codepen.io/dangodev/pen/Baoqmoy
$graphqlString = <<<'HEREDOC'
query comments($filter:CommentFilter!,$limit:Int,$page:Int){comments(filter:$filter,limit:$limit,page:$page){
items{...commentFields}pagination{...paginationFields}}}fragment commentFields on Comment{commentId threadId url
preparedHtmlContent user{...userMediumAvatarFields...userNameFields...userPersonaFields bestBadge{...badgeFields}}
reactionCounts{type count}deletable currentUserReaction{type}reported reportable source status createdAt updatedAt
ignored popular deletedBy{username}notes{content createdAt user{username}}lastEdit{reason timeAgo userId}}fragment
userMediumAvatarFields on User{userId isDeletedOrPendingDeletion imageUrls(slot:"default",variations:
["user_small_avatar"])}fragment userNameFields on User{userId username isUserProfileHidden isDeletedOrPendingDeletion}
fragment userPersonaFields on User{persona{type text}}fragment badgeFields on Badge{badgeId level{...badgeLevelFields}}
fragment badgeLevelFields on BadgeLevel{key name description}fragment paginationFields on Pagination{count current last
next previous size order}
HEREDOC;
// Construct the JSON object to send to the Website
$queryArray = [
'query' => $graphqlString,
'variables' => [
'filter' => [
'threadId' => [
'eq' => $threadID,
],
'order' => [
'direction' => 'Descending',
],
],
'page' => 1,
],
];
$queryJSON = json_encode($queryArray);
// HTTP headers
$header = [
'Content-Type: application/json',
'Accept: application/json, text/plain, */*',
'X-Pepper-Txn: threads.show',
'X-Request-Type: application/vnd.pepper.v1+json',
'X-Requested-With: XMLHttpRequest',
"Cookie: $cookiesHeaderValue",
];
// CURL Options
$opts = [
CURLOPT_POST => 1,
CURLOPT_POSTFIELDS => $queryJSON
];
$json = getContents($url, $header, $opts);
$objects = json_decode($json);
foreach ($objects->data->comments->items as $comment) {
$item = [];
$item['uri'] = $comment->url;
$item['title'] = $comment->user->username . ' - ' . $comment->createdAt;
$item['author'] = $comment->user->username;
$item['content'] = $comment->preparedHtmlContent;
$item['uid'] = $comment->commentId;
// Timestamp handling needs a new parsing function
if ($onlyWithUrl == true) {
// Count Links and Quote Links
$content = str_get_html($item['content']);
$countLinks = count($content->find('a[href]'));
$countQuoteLinks = count($content->find('a[href][class=userHtml-quote-source]'));
// Only add element if there are Links ans more links tant Quote links
if ($countLinks > 0 && $countLinks > $countQuoteLinks) {
$this->items[] = $item;
}
} else {
$this->items[] = $item;
}
}
}
/**
* Extract the cookies obtained from the URL
* @return array the array containing the cookies set by the URL
*/
private function getCookiesHeaderValue($url)
{
$response = getContents($url, [], [], true);
$setCookieHeaders = $response['headers']['set-cookie'] ?? [];
$cookies = array_map(fn($c): string => explode(';', $c)[0], $setCookieHeaders);
return implode('; ', $cookies);
}
/**
* Check if the string $str contains any of the string of the array $arr
* @return boolean true if the string matched anything otherwise false
*/
private function contains($str, array $arr)
{
foreach ($arr as $a) {
if (stripos($str, $a) !== false) {
return true;
}
}
return false;
}
/**
* Get the Price from a Deal if it exists
* @return string String of the deal price
*/
private function getPrice($deal)
{
if (
$deal->find(
'span[class*=thread-price]',
0
) != null
) {
return '<div>' . $this->i8n('price') . ' : '
. $deal->find(
'span[class*=thread-price]',
0
)->plaintext
. '</div>';
} else {
return '';
}
}
/**
* Get the Title from a Deal if it exists
* @return string String of the deal title
*/
private function getTitle($deal)
{
$titleRoot = $deal->find('div[class*=threadGrid-title]', 0);
$titleA = $titleRoot->find('a[class*=thread-link]', 0);
$titleFirstChild = $titleRoot->first_child();
if ($titleA !== null) {
$title = $titleA->plaintext;
} else {
// In some case, expired deals have a different format
$title = $titleRoot->find('span', 0)->plaintext;
}
return $title;
}
/**
* Get the Title from a Talk if it exists
* @return string String of the Talk title
*/
private function getTalkTitle()
{
$html = getSimpleHTMLDOMCached($this->getInput('url'));
$title = $html->find('.thread-title', 0)->plaintext;
return $title;
}
/**
* Get the HTML Title code from an item
* @return string String of the deal title
*/
private function getHTMLTitle($item)
{
if ($item['uri'] == '') {
$html = '<h2>' . $item['title'] . '</h2>';
} else {
$html = '<h2><a href="' . $item['uri'] . '">'
. $item['title'] . '</a></h2>';
}
return $html;
}
/**
* Get the URI from a Deal if it exists
* @return string String of the deal URI
*/
private function getDealURI($deal)
{
$dealId = $deal->attr['id'];
$uri = $this->i8n('bridge-uri') . $this->i8n('uri-deal') . str_replace('_', '-', $dealId);
return $uri;
}
/**
* Get the Shipping costs from a Deal if it exists
* @return string String of the deal shipping Cost
*/
private function getShippingCost($deal)
{
if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0) != null) {
if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1) != null) {
return '<div>' . $this->i8n('shipping') . ' : '
. strip_tags($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1)->innertext)
. '</div>';
} else {
return '<div>' . $this->i8n('shipping') . ' : '
. strip_tags($deal->find('span[class*=text--color-greyShade flex--inline]', 0)->innertext)
. '</div>';
}
} else {
return '';
}
}
/**
* Get the temperature from a Deal if it exists
* @return string String of the deal temperature
*/
private function getTemperature($data)
{
return $data['props']['thread']['temperature'] . '°';
}
/**
* Get the Deal data from the "data-vue2" JSON attribute
* @return array Array containg the deal properties contained in the "data-vue2" attribute
*/
private function getDealJsonData($deal)
{
$data = Json::decode($deal->find('div[class=js-vue2]', 0)->getAttribute('data-vue2'));
return $data;
}
/**
* Get the source of a Deal if it exists
* @return string String of the deal source
*/
private function getSource($jsonData)
{
if ($jsonData['props']['thread']['merchant'] != null) {
$path = $this->i8n('uri-merchant') . $jsonData['props']['thread']['merchant']['merchantId'];
$text = $jsonData['props']['thread']['merchant']['merchantName'];
return '<div>' . $this->i8n('origin') . ' : <a href="' . static::URI . $path . '">' . $text . '</a></div>';
} else {
return '';
}
}
/**
* Get the original Price and discout from a Deal if it exists
* @return string String of the deal original price and discount
*/
private function getDiscount($deal)
{
if ($deal->find('span[class*=mute--text text--lineThrough]', 0) != null) {
$discountHtml = $deal->find('span[class=space--ml-1 size--all-l size--fromW3-xl]', 0);
if ($discountHtml != null) {
$discount = $discountHtml->plaintext;
} else {
$discount = '';
}
return '<div>' . $this->i8n('discount') . ' : <span style="text-decoration: line-through;">'
. $deal->find(
'span[class*=mute--text text--lineThrough]',
0
)->plaintext
. '</span>&nbsp;'
. $discount
. '</div>';
} else {
return '';
}
}
/**
* Get the Picture URL from a Deal if it exists
* @return string String of the deal Picture URL
*/
private function getImage($deal)
{
$selectorLazy = implode(
' ', /* Notice this is a space! */
[
'thread-image',
'width--all-auto',
'height--all-auto',
'imgFrame-img',
'img--dummy',
'js-lazy-img'
]
);
$selectorPlain = implode(
' ', /* Notice this is a space! */
[
'thread-image',
'width--all-auto',
'height--all-auto',
'imgFrame-img',
]
);
if ($deal->find('img[class=' . $selectorLazy . ']', 0) != null) {
return json_decode(
html_entity_decode(
$deal->find('img[class=' . $selectorLazy . ']', 0)
->getAttribute('data-lazy-img')
)
)->{'src'};
} else {
return $deal->find('img[class*=' . $selectorPlain . ']', 0)->src ?? '';
}
}
/**
* Get the originating country from a Deal if it exists
* @return string String of the deal originating country
*/
private function getShipsFrom($deal)
{
$selector = implode(
' ', /* Notice this is a space! */
[
'hide--toW2',
'metaRibbon',
]
);
if ($deal->find('span[class*=' . $selector . ']', 0) != null) {
$children = $deal->find('span[class*=' . $selector . ']', 0)->children(2);
if ($children) {
return '<div>' . $children->plaintext . '</div>';
}
}
return '';
}
/**
* Transforms a local date into a timestamp
* @return int timestamp of the input date
*/
private function parseDate($string)
{
$month_local = $this->i8n('local-months');
$month_en = [
'January',
'February',
'March',
'April',
'May',
'June',
'July',
'August',
'September',
'October',
'November',
'December'
];
// A date can be prfixed with some words, we remove theme
$string = $this->removeDatePrefixes($string);
// We translate the local months name in the english one
$date_str = trim(str_replace($month_local, $month_en, $string));
// If the date does not contain any year, we add the current year
if (!preg_match('/[0-9]{4}/', $string)) {
$date_str .= ' ' . date('Y');
}
// Add the Hour and minutes
$date_str .= ' 00:00';
$date = DateTime::createFromFormat('j F Y H:i', $date_str);
// In some case, the date is not recognized : as a workaround the actual date is taken
if ($date === false) {
$date = new DateTime();
}
return $date->getTimestamp();
}
/**
* Remove the prefix of a date if it has one
* @return the date without prefiux
*/
private function removeDatePrefixes($string)
{
$string = str_replace($this->i8n('date-prefixes'), [], $string);
return $string;
}
/**
* Remove the suffix of a relative date if it has one
* @return the relative date without suffixes
*/
private function removeRelativeDateSuffixes($string)
{
if (count($this->i8n('relative-date-ignore-suffix')) > 0) {
$string = preg_replace($this->i8n('relative-date-ignore-suffix'), '', $string);
}
return $string;
}
/**
* Transforms a relative local date into a timestamp
* @return int timestamp of the input date
*/
private function relativeDateToTimestamp($str)
{
$date = new DateTime();
// The minimal amount of time substracted is a minute : the seconds in the resulting date would be related to the execution time of the script.
// This make no sense, so we set the seconds manually to "00".
$date->setTime($date->format('H'), $date->format('i'), 0);
// In case of update date, replace it by the regular relative date first word
$str = str_replace($this->i8n('relative-date-alt-prefixes'), $this->i8n('local-time-relative')[0], $str);
$str = $this->removeRelativeDateSuffixes($str);
$search = $this->i8n('local-time-relative');
$replace = [
'-',
'minute',
'hour',
'day',
'month',
'year',
''
];
$date->modify(str_replace($search, $replace, $str));
return $date->getTimestamp();
}
/**
* Returns the RSS Feed title according to the parameters
* @return string the RSS feed Tiyle
*/
public function getName()
{
switch ($this->queriedContext) {
case $this->i8n('context-keyword'):
return $this->i8n('bridge-name') . ' - ' . $this->i8n('title-keyword') . ' : ' . $this->getInput('q');
break;
case $this->i8n('context-group'):
return $this->i8n('bridge-name') . ' - ' . $this->i8n('title-group') . ' : ' . $this->getKey('group');
break;
case $this->i8n('context-talk'):
return $this->i8n('bridge-name') . ' - ' . $this->i8n('title-talk') . ' : ' . $this->getTalkTitle();
break;
default: // Return default value
return static::NAME;
}
}
/**
* Returns the RSS Feed URI according to the parameters
* @return string the RSS feed Title
*/
public function getURI()
{
switch ($this->queriedContext) {
case $this->i8n('context-keyword'):
return $this->getSearchURI();
break;
case $this->i8n('context-group'):
return $this->getGroupURI();
break;
case $this->i8n('context-talk'):
return $this->getTalkURI();
break;
default: // Return default value
return static::URI;
}
}
/**
* Returns the RSS Feed URI for a keyword Feed
* @return string the RSS feed URI
*/
private function getSearchURI()
{
$q = $this->getInput('q');
$hide_expired = $this->getInput('hide_expired');
$hide_local = $this->getInput('hide_local');
$priceFrom = $this->getInput('priceFrom');
$priceTo = $this->getInput('priceTo');
$url = $this->i8n('bridge-uri')
. 'search/advanced?q='
. urlencode($q)
. '&hide_expired=' . $hide_expired
. '&hide_local=' . $hide_local
. '&priceFrom=' . $priceFrom
. '&priceTo=' . $priceTo
/* Some default parameters
* search_fields : Search in Titres & Descriptions & Codes
* sort_by : Sort the search by new deals
* time_frame : Search will not be on a limited timeframe
*/
. '&search_fields[]=1&search_fields[]=2&search_fields[]=3&sort_by=new&time_frame=0';
return $url;
}
/**
* Returns the RSS Feed URI for a group Feed
* @return string the RSS feed URI
*/
private function getGroupURI()
{
$group = $this->getInput('group');
$order = $this->getInput('order');
$url = $this->i8n('bridge-uri')
. $this->i8n('uri-group') . $group . $order;
return $url;
}
/**
* Returns the RSS Feed URI for a Talk Feed
* @return string the RSS feed URI
*/
private function getTalkURI()
{
$url = $this->getInput('url');
return $url;
}
/**
* This is some "localisation" function that returns the needed content using
* the "$lang" class variable in the local class
* @return various the local content needed
*/
protected function i8n($key)
{
if (array_key_exists($key, $this->lang)) {
return $this->lang[$key];
} else {
return null;
}
}
}