From e1e9a12440be621417dde4cdf1c96613e3f84a65 Mon Sep 17 00:00:00 2001 From: Nemo Date: Mon, 4 Apr 2022 23:11:40 +0530 Subject: [PATCH] [AmazonPriceTrackerBridge] Minor fix for parser, and new strategy (#2603) --- bridges/AmazonPriceTrackerBridge.php | 48 ++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/bridges/AmazonPriceTrackerBridge.php b/bridges/AmazonPriceTrackerBridge.php index 4efd441c..98f8be9f 100644 --- a/bridges/AmazonPriceTrackerBridge.php +++ b/bridges/AmazonPriceTrackerBridge.php @@ -49,6 +49,8 @@ class AmazonPriceTrackerBridge extends BridgeAbstract { '.a-color-price', ); + const WHITESPACE = " \t\n\r\0\x0B\xC2\xA0"; + protected $title; /** @@ -154,6 +156,22 @@ EOT; return false; } + private function scrapePriceTwister($html) { + $str = $html->find('.twister-plus-buying-options-price-data', 0); + + $data = json_decode($str->innertext, true); + if(count($data) === 1) { + $data = $data[0]; + return array( + 'displayPrice' => $data['displayPrice'], + 'currency' => $data['currency'], + 'shipping' => '0', + ); + } + + return false; + } + private function scrapePriceGeneric($html) { $priceDiv = null; @@ -168,12 +186,11 @@ EOT; return false; } - $priceString = $priceDiv->plaintext; - - preg_match('/[\d.,]+/', $priceString, $matches); + $priceString = str_replace(str_split(self::WHITESPACE), '', $priceDiv->plaintext); + preg_match('/(\d+\.\d{0,2})/', $priceString, $matches); $price = $matches[0]; - $currency = trim(str_replace($price, '', $priceString), " \t\n\r\0\x0B\xC2\xA0"); + $currency = str_replace($price, '', $priceString); if ($price != null && $currency != null) { return array( @@ -186,6 +203,21 @@ EOT; return false; } + private function renderContent($image, $data) { + $price = $data['displayPrice']; + if (!$price) { + $price = "{$data['price']} {$data['currency']}"; + } + + $html = "$image
Price: $price"; + + if ($data['shipping'] !== '0') { + $html .= "
Shipping: {$data['shipping']} {$data['currency']}
"; + } + + return $html; + } + /** * Scrape method for Amazon product page * @return [type] [description] @@ -195,20 +227,16 @@ EOT; $this->title = $this->getTitle($html); $imageTag = $this->getImage($html); - $data = $this->scrapePriceFromMetrics($html) ?: $this->scrapePriceGeneric($html); + $data = $this->scrapePriceGeneric($html); $item = array( 'title' => $this->title, 'uri' => $this->getURI(), - 'content' => "$imageTag
Price: {$data['price']} {$data['currency']}", + 'content' => $this->renderContent($imageTag, $data), // This is to ensure that feed readers notice the price change 'uid' => md5($data['price']) ); - if ($data['shipping'] !== '0') { - $item['content'] .= "
Shipping: {$data['shipping']} {$data['currency']}
"; - } - $this->items[] = $item; } }