2022-11-17 07:13:33 +03:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class AllegroBridge extends BridgeAbstract
|
|
|
|
{
|
|
|
|
const NAME = 'Allegro';
|
|
|
|
const URI = 'https://www.allegro.pl';
|
|
|
|
const DESCRIPTION = 'Returns the search results from the Allegro.pl shopping and bidding portal';
|
|
|
|
const MAINTAINER = 'wrobelda';
|
|
|
|
const PARAMETERS = [[
|
|
|
|
'url' => [
|
|
|
|
'name' => 'Search URL',
|
|
|
|
'title' => 'Copy the URL from your browser\'s address bar after searching for your items and paste it here',
|
|
|
|
'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660',
|
|
|
|
'required' => true,
|
|
|
|
],
|
2024-04-02 00:44:45 +03:00
|
|
|
'cookie' => [
|
|
|
|
'name' => 'The complete cookie value',
|
|
|
|
'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits',
|
2022-11-17 07:13:33 +03:00
|
|
|
'required' => false,
|
|
|
|
],
|
|
|
|
'includeSponsoredOffers' => [
|
|
|
|
'type' => 'checkbox',
|
2023-07-25 21:52:47 +03:00
|
|
|
'name' => 'Include Sponsored Offers',
|
|
|
|
'defaultValue' => 'checked'
|
|
|
|
],
|
|
|
|
'includePromotedOffers' => [
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'name' => 'Include Promoted Offers',
|
|
|
|
'defaultValue' => 'checked'
|
2022-11-17 07:13:33 +03:00
|
|
|
]
|
|
|
|
]];
|
|
|
|
|
|
|
|
public function getName()
|
|
|
|
{
|
2024-01-24 01:02:06 +03:00
|
|
|
$url = $this->getInput('url');
|
|
|
|
if (!$url) {
|
|
|
|
return parent::getName();
|
|
|
|
}
|
|
|
|
$parsedUrl = parse_url($url, PHP_URL_QUERY);
|
|
|
|
if (!$parsedUrl) {
|
|
|
|
return parent::getName();
|
|
|
|
}
|
|
|
|
parse_str($parsedUrl, $fields);
|
2022-11-17 07:13:33 +03:00
|
|
|
|
2024-01-24 01:02:06 +03:00
|
|
|
if (array_key_exists('string', $fields)) {
|
|
|
|
$f = urldecode($fields['string']);
|
|
|
|
} else {
|
|
|
|
$f = false;
|
|
|
|
}
|
|
|
|
if ($f) {
|
|
|
|
return $f;
|
2022-11-17 07:13:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return parent::getName();
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getURI()
|
|
|
|
{
|
|
|
|
return $this->getInput('url') ?? parent::getURI();
|
|
|
|
}
|
|
|
|
|
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
# make sure we order by the most recently listed offers
|
|
|
|
$url = preg_replace('/([?&])order=[^&]+(&|$)/', '$1', $this->getInput('url'));
|
|
|
|
$url .= (parse_url($url, PHP_URL_QUERY) ? '&' : '?') . 'order=n';
|
|
|
|
|
|
|
|
$opts = [];
|
|
|
|
|
2024-04-02 00:44:45 +03:00
|
|
|
// If a cookie is provided
|
|
|
|
if ($cookie = $this->getInput('cookie')) {
|
|
|
|
$opts[CURLOPT_COOKIE] = $cookie;
|
2022-11-17 07:13:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
$html = getSimpleHTMLDOM($url, [], $opts);
|
|
|
|
|
|
|
|
# if no results found
|
|
|
|
if ($html->find('.mzmg_6m.m9qz_yo._6a66d_-fJr5')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$results = $html->find('article[data-analytics-view-custom-context="REGULAR"]');
|
2022-11-17 07:13:33 +03:00
|
|
|
|
2024-04-02 01:06:15 +03:00
|
|
|
if ($this->getInput('includeSponsoredOffers')) {
|
2023-07-25 21:52:47 +03:00
|
|
|
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]'));
|
|
|
|
}
|
|
|
|
|
2024-04-02 01:06:15 +03:00
|
|
|
if ($this->getInput('includePromotedOffers')) {
|
2023-07-25 21:52:47 +03:00
|
|
|
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]'));
|
2022-11-17 07:13:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
foreach ($results as $post) {
|
|
|
|
$item = [];
|
|
|
|
|
|
|
|
$item['uid'] = $post->{'data-analytics-view-value'};
|
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$item_link = $post->find('a[href*="' . $item['uid'] . '"], a[href*="allegrolokalnie"]', 0);
|
2022-11-17 07:13:33 +03:00
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$item['uri'] = $item_link->href;
|
2022-11-17 07:13:33 +03:00
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$item['title'] = $item_link->find('img', 0)->alt;
|
2022-11-17 07:13:33 +03:00
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$image = $item_link->find('img', 0)->{'data-src'} ?: $item_link->find('img', 0)->src ?? false;
|
2022-11-17 07:13:33 +03:00
|
|
|
|
|
|
|
if ($image) {
|
|
|
|
$item['enclosures'] = [$image . '#.image'];
|
|
|
|
}
|
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$price = $post->{'data-analytics-view-json-custom-price'};
|
|
|
|
if ($price) {
|
|
|
|
$priceDecoded = json_decode(html_entity_decode($price));
|
|
|
|
$price = $priceDecoded->amount . ' ' . $priceDecoded->currency;
|
|
|
|
}
|
|
|
|
|
|
|
|
$descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/'];
|
|
|
|
$descriptionReplacements = ['<span>', ':</span> ', '<strong>', ' </strong> '];
|
|
|
|
$description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext;
|
|
|
|
$descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description);
|
|
|
|
|
|
|
|
$pricingExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) {
|
2022-11-17 07:13:33 +03:00
|
|
|
return empty($node->find('.mvrt_0'));
|
|
|
|
});
|
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$pricingExtraInfo = $pricingExtraInfo[0]->plaintext ?? '';
|
|
|
|
|
|
|
|
$offerExtraInfo = array_map(function ($node) {
|
|
|
|
return str_contains($node->plaintext, 'zapłać później') ? '' : $node->outertext;
|
|
|
|
}, $post->find('div.mpof_ki.mwdn_1.mj7a_4.mgn2_12'));
|
2022-11-17 07:13:33 +03:00
|
|
|
|
2023-07-25 21:52:47 +03:00
|
|
|
$isSmart = $post->find('img[alt="Smart!"]', 0) ?? false;
|
|
|
|
if ($isSmart) {
|
|
|
|
$pricingExtraInfo .= $isSmart->outertext;
|
2022-11-17 07:13:33 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
$item['categories'] = [];
|
|
|
|
$parameters = $post->find('dd');
|
|
|
|
foreach ($parameters as $parameter) {
|
|
|
|
if (in_array(strtolower($parameter->innertext), ['brak', 'nie'])) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$item['categories'][] = $parameter->innertext;
|
|
|
|
}
|
|
|
|
|
|
|
|
$item['content'] = $descriptionPretty
|
|
|
|
. '<div><strong>'
|
|
|
|
. $price
|
|
|
|
. '</strong></div><div>'
|
2023-07-25 21:52:47 +03:00
|
|
|
. implode('</div><div>', $offerExtraInfo)
|
2022-11-17 07:13:33 +03:00
|
|
|
. '</div><dl>'
|
2023-07-25 21:52:47 +03:00
|
|
|
. $pricingExtraInfo
|
2022-11-17 07:13:33 +03:00
|
|
|
. '</dl><hr>';
|
|
|
|
|
|
|
|
$this->items[] = $item;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|