2018-07-16 18:24:52 +05:30
|
|
|
<?php
|
|
|
|
|
|
|
|
class AmazonPriceTrackerBridge extends BridgeAbstract
|
|
|
|
{
|
2021-07-05 22:26:08 +02:00
|
|
|
const MAINTAINER = 'captn3m0, sal0max';
|
2018-07-16 18:24:52 +05:30
|
|
|
const NAME = 'Amazon Price Tracker';
|
|
|
|
const URI = 'https://www.amazon.com/';
|
|
|
|
const CACHE_TIMEOUT = 3600; // 1h
|
|
|
|
const DESCRIPTION = 'Tracks price for a single product on Amazon';
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
const PARAMETERS = [
|
2022-07-01 15:10:30 +02:00
|
|
|
[
|
2018-07-16 18:24:52 +05:30
|
|
|
'asin' => [
|
|
|
|
'name' => 'ASIN',
|
|
|
|
'required' => true,
|
|
|
|
'exampleValue' => 'B071GB1VMQ',
|
|
|
|
// https://stackoverflow.com/a/12827734
|
|
|
|
'pattern' => 'B[\dA-Z]{9}|\d{9}(X|\d)',
|
|
|
|
],
|
|
|
|
'tld' => [
|
|
|
|
'name' => 'Country',
|
|
|
|
'type' => 'list',
|
|
|
|
'values' => [
|
2022-06-04 19:59:52 +00:00
|
|
|
'Australia' => 'com.au',
|
2018-07-16 18:24:52 +05:30
|
|
|
'Brazil' => 'com.br',
|
|
|
|
'Canada' => 'ca',
|
|
|
|
'China' => 'cn',
|
|
|
|
'France' => 'fr',
|
|
|
|
'Germany' => 'de',
|
|
|
|
'India' => 'in',
|
|
|
|
'Italy' => 'it',
|
|
|
|
'Japan' => 'co.jp',
|
|
|
|
'Mexico' => 'com.mx',
|
2022-06-04 19:59:52 +00:00
|
|
|
'Netherlands' => 'nl',
|
2022-07-21 20:41:15 +02:00
|
|
|
'Poland' => 'pl',
|
2018-07-16 18:24:52 +05:30
|
|
|
'Spain' => 'es',
|
2020-11-16 18:13:23 +01:00
|
|
|
'Sweden' => 'se',
|
2022-06-04 19:59:52 +00:00
|
|
|
'Turkey' => 'com.tr',
|
2018-07-16 18:24:52 +05:30
|
|
|
'United Kingdom' => 'co.uk',
|
|
|
|
'United States' => 'com',
|
|
|
|
],
|
|
|
|
'defaultValue' => 'com',
|
|
|
|
],
|
|
|
|
]];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2021-07-12 23:19:29 +05:30
|
|
|
const PRICE_SELECTORS = [
|
|
|
|
'#priceblock_ourprice',
|
|
|
|
'.priceBlockBuyingPriceString',
|
|
|
|
'#newBuyBoxPrice',
|
|
|
|
'#tp_price_block_total_price_ww',
|
|
|
|
'span.offer-price',
|
|
|
|
'.a-color-price',
|
|
|
|
];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-04-04 23:11:40 +05:30
|
|
|
const WHITESPACE = " \t\n\r\0\x0B\xC2\xA0";
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
protected $title;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Generates domain name given a amazon TLD
|
|
|
|
*/
|
|
|
|
private function getDomainName()
|
|
|
|
{
|
|
|
|
return 'https://www.amazon.' . $this->getInput('tld');
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Generates URI for a Amazon product page
|
|
|
|
*/
|
|
|
|
public function getURI()
|
|
|
|
{
|
|
|
|
if (!is_null($this->getInput('asin'))) {
|
2021-07-12 23:19:29 +05:30
|
|
|
return $this->getDomainName() . '/dp/' . $this->getInput('asin');
|
2018-07-16 18:24:52 +05:30
|
|
|
}
|
|
|
|
return parent::getURI();
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Scrapes the product title from the html page
|
|
|
|
* returns the default title if scraping fails
|
|
|
|
*/
|
|
|
|
private function getTitle($html)
|
|
|
|
{
|
|
|
|
$titleTag = $html->find('#productTitle', 0);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
if (!$titleTag) {
|
|
|
|
return $this->getDefaultTitle();
|
|
|
|
} else {
|
|
|
|
return trim(html_entity_decode($titleTag->innertext, ENT_QUOTES));
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
2018-07-16 18:24:52 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Title used by the feed if none could be found
|
|
|
|
*/
|
|
|
|
private function getDefaultTitle()
|
|
|
|
{
|
|
|
|
return 'Amazon.' . $this->getInput('tld') . ': ' . $this->getInput('asin');
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Returns name for the feed
|
|
|
|
* Uses title (already scraped) if it has one
|
|
|
|
*/
|
|
|
|
public function getName()
|
|
|
|
{
|
|
|
|
if (isset($this->title)) {
|
|
|
|
return $this->title;
|
|
|
|
} else {
|
|
|
|
return parent::getName();
|
|
|
|
}
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
private function parseDynamicImage($attribute)
|
|
|
|
{
|
|
|
|
$json = json_decode(html_entity_decode($attribute), true);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
if ($json and count($json) > 0) {
|
|
|
|
return array_keys($json)[0];
|
|
|
|
}
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Returns a generated image tag for the product
|
|
|
|
*/
|
|
|
|
private function getImage($html)
|
|
|
|
{
|
2023-07-29 00:14:30 +02:00
|
|
|
$image = 'https://placekitten.com/200/300';
|
2018-07-16 18:24:52 +05:30
|
|
|
$imageSrc = $html->find('#main-image-container img', 0);
|
|
|
|
if ($imageSrc) {
|
2018-08-01 01:14:37 +05:30
|
|
|
$hiresImage = $imageSrc->getAttribute('data-old-hires');
|
|
|
|
$dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image');
|
|
|
|
$image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute);
|
2018-07-16 18:24:52 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
return <<<EOT
|
|
|
|
<img width="300" style="max-width:300;max-height:300" src="$image" alt="{$this->title}" />
|
|
|
|
EOT;
|
2018-07-16 18:24:52 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
/**
|
|
|
|
* Return \simple_html_dom object
|
|
|
|
* for the entire html of the product page
|
|
|
|
*/
|
|
|
|
private function getHtml()
|
|
|
|
{
|
|
|
|
$uri = $this->getURI();
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2025-01-03 05:04:49 +01:00
|
|
|
return getSimpleHTMLDOM($uri);
|
2018-07-16 18:24:52 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
private function scrapePriceFromMetrics($html)
|
|
|
|
{
|
|
|
|
$asinData = $html->find('#cerberus-data-metrics', 0);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
// <div id="cerberus-data-metrics" style="display: none;"
|
|
|
|
// data-asin="B00WTHJ5SU" data-asin-price="14.99" data-asin-shipping="0"
|
|
|
|
// data-asin-currency-code="USD" data-substitute-count="-1" ... />
|
|
|
|
if ($asinData) {
|
2019-11-01 18:06:38 +01:00
|
|
|
return [
|
2018-08-01 01:14:37 +05:30
|
|
|
'price' => $asinData->getAttribute('data-asin-price'),
|
|
|
|
'currency' => $asinData->getAttribute('data-asin-currency-code'),
|
|
|
|
'shipping' => $asinData->getAttribute('data-asin-shipping')
|
2019-11-01 18:06:38 +01:00
|
|
|
];
|
2018-08-01 01:14:37 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
return false;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-04-04 23:11:40 +05:30
|
|
|
private function scrapePriceTwister($html)
|
|
|
|
{
|
|
|
|
$str = $html->find('.twister-plus-buying-options-price-data', 0);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-04-04 23:11:40 +05:30
|
|
|
$data = json_decode($str->innertext, true);
|
|
|
|
if (count($data) === 1) {
|
|
|
|
$data = $data[0];
|
|
|
|
return [
|
|
|
|
'displayPrice' => $data['displayPrice'],
|
|
|
|
'currency' => $data['currency'],
|
|
|
|
'shipping' => '0',
|
|
|
|
];
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-04-04 23:11:40 +05:30
|
|
|
return false;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-01 01:14:37 +05:30
|
|
|
private function scrapePriceGeneric($html)
|
|
|
|
{
|
2022-10-16 20:26:33 +02:00
|
|
|
$default = [
|
|
|
|
'price' => null,
|
|
|
|
'displayPrice' => null,
|
|
|
|
'currency' => null,
|
|
|
|
'shipping' => null,
|
|
|
|
];
|
2021-07-12 23:19:29 +05:30
|
|
|
$priceDiv = null;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2021-07-12 23:19:29 +05:30
|
|
|
foreach (self::PRICE_SELECTORS as $sel) {
|
|
|
|
$priceDiv = $html->find($sel, 0);
|
|
|
|
if ($priceDiv) {
|
|
|
|
break;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
|
|
|
|
2021-07-12 23:19:29 +05:30
|
|
|
if (!$priceDiv) {
|
2022-10-16 20:26:33 +02:00
|
|
|
return $default;
|
2021-07-12 23:19:29 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-04-04 23:11:40 +05:30
|
|
|
$priceString = str_replace(str_split(self::WHITESPACE), '', $priceDiv->plaintext);
|
|
|
|
preg_match('/(\d+\.\d{0,2})/', $priceString, $matches);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-10-16 20:26:33 +02:00
|
|
|
$price = $matches[0] ?? null;
|
2022-04-04 23:11:40 +05:30
|
|
|
$currency = str_replace($price, '', $priceString);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2021-07-05 22:26:08 +02:00
|
|
|
if ($price != null && $currency != null) {
|
2019-11-01 18:06:38 +01:00
|
|
|
return [
|
2021-07-05 22:26:08 +02:00
|
|
|
'price' => $price,
|
2022-10-16 20:26:33 +02:00
|
|
|
'displayPrice' => null,
|
2021-07-05 22:26:08 +02:00
|
|
|
'currency' => $currency,
|
2018-08-01 01:14:37 +05:30
|
|
|
'shipping' => '0'
|
2019-11-01 18:06:38 +01:00
|
|
|
];
|
2018-08-01 01:14:37 +05:30
|
|
|
}
|
2022-10-16 20:26:33 +02:00
|
|
|
return $default;
|
2018-08-01 01:14:37 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2022-10-16 20:26:33 +02:00
|
|
|
public function collectData()
|
2022-04-04 23:11:40 +05:30
|
|
|
{
|
2022-10-16 20:26:33 +02:00
|
|
|
$html = $this->getHtml();
|
|
|
|
$this->title = $this->getTitle($html);
|
|
|
|
$image = $this->getImage($html);
|
|
|
|
$data = $this->scrapePriceGeneric($html);
|
|
|
|
|
|
|
|
// render
|
|
|
|
$content = '';
|
2022-04-04 23:11:40 +05:30
|
|
|
$price = $data['displayPrice'];
|
|
|
|
if (!$price) {
|
2022-10-16 20:26:33 +02:00
|
|
|
$price = sprintf('%s %s', $data['price'], $data['currency']);
|
2022-04-04 23:11:40 +05:30
|
|
|
}
|
2022-10-16 20:26:33 +02:00
|
|
|
$content .= sprintf('%s<br>Price: %s', $image, $price);
|
2022-04-04 23:11:40 +05:30
|
|
|
if ($data['shipping'] !== '0') {
|
2022-10-16 20:26:33 +02:00
|
|
|
$content .= sprintf('<br>Shipping: %s %s</br>', $data['shipping'], $data['currency']);
|
2022-04-04 23:11:40 +05:30
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
$item = [
|
|
|
|
'title' => $this->title,
|
|
|
|
'uri' => $this->getURI(),
|
2022-10-16 20:26:33 +02:00
|
|
|
'content' => $content,
|
2021-07-12 23:19:29 +05:30
|
|
|
// This is to ensure that feed readers notice the price change
|
|
|
|
'uid' => md5($data['price'])
|
2018-07-16 18:24:52 +05:30
|
|
|
];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-07-16 18:24:52 +05:30
|
|
|
$this->items[] = $item;
|
|
|
|
}
|
|
|
|
}
|