2013-08-11 15:30:41 +04:00
|
|
|
<?php
|
2022-06-22 19:32:54 +03:00
|
|
|
|
2016-12-07 00:45:52 +03:00
|
|
|
class GoogleSearchBridge extends BridgeAbstract
|
|
|
|
{
|
2017-02-11 18:16:56 +03:00
|
|
|
const MAINTAINER = 'sebsauvage';
|
|
|
|
const NAME = 'Google search';
|
|
|
|
const URI = 'https://www.google.com/';
|
2023-07-24 00:05:35 +03:00
|
|
|
const CACHE_TIMEOUT = 60 * 30; // 30m
|
2022-06-22 19:32:54 +03:00
|
|
|
const DESCRIPTION = 'Returns max 100 results from the past year.';
|
2013-08-11 15:30:41 +04:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
const PARAMETERS = [[
|
2022-07-01 16:10:30 +03:00
|
|
|
'q' => [
|
2018-06-30 00:55:33 +03:00
|
|
|
'name' => 'keyword',
|
2022-06-22 19:32:54 +03:00
|
|
|
'required' => true,
|
|
|
|
'exampleValue' => 'rss-bridge',
|
2022-07-01 16:10:30 +03:00
|
|
|
],
|
2022-06-24 12:18:27 +03:00
|
|
|
'verbatim' => [
|
|
|
|
'name' => 'Verbatim',
|
2022-06-22 19:32:54 +03:00
|
|
|
'type' => 'checkbox',
|
|
|
|
'title' => 'Use literal keyword(s) without making improvements',
|
2022-07-01 16:10:30 +03:00
|
|
|
],
|
2022-06-22 19:32:54 +03:00
|
|
|
]];
|
2015-11-05 18:50:18 +03:00
|
|
|
|
2016-12-07 00:45:52 +03:00
|
|
|
public function collectData()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-10-26 01:47:45 +03:00
|
|
|
// todo: wrap this in try..catch because 429 too many requests happens a lot
|
2022-06-24 12:18:27 +03:00
|
|
|
$dom = getSimpleHTMLDOM($this->getURI(), ['Accept-language: en-US']);
|
|
|
|
if (!$dom) {
|
|
|
|
returnServerError('No results for this query.');
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2022-06-24 12:18:27 +03:00
|
|
|
$result = $dom->find('div[id=res]', 0);
|
2016-12-07 00:45:52 +03:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
if (!$result) {
|
|
|
|
return;
|
|
|
|
}
|
2016-12-07 00:45:52 +03:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
foreach ($result->find('div[class~=g]') as $element) {
|
|
|
|
$item = [];
|
2016-12-07 00:45:52 +03:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
$url = $element->find('a[href]', 0)->href;
|
|
|
|
$item['uri'] = htmlspecialchars_decode($url);
|
|
|
|
$item['title'] = $element->find('h3', 0)->plaintext;
|
2016-12-07 00:45:52 +03:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
$resultDom = $element->find('div[data-content-feature=1]', 0);
|
|
|
|
if ($resultDom) {
|
|
|
|
// Split by — or ·
|
|
|
|
$resultParts = preg_split('/( — | · )/', $resultDom->plaintext);
|
|
|
|
$resultDate = trim($resultParts[0]);
|
|
|
|
$resultContent = trim($resultParts[1] ?? '');
|
|
|
|
} else {
|
|
|
|
// Some search results don't have this particular dom identifier
|
|
|
|
$resultDate = null;
|
|
|
|
$resultContent = null;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2022-04-17 00:16:38 +03:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
if ($resultDate) {
|
2022-07-01 16:10:30 +03:00
|
|
|
try {
|
2022-06-22 19:32:54 +03:00
|
|
|
$createdAt = new \DateTime($resultDate);
|
|
|
|
// Set to midnight for consistent datetime
|
|
|
|
$createdAt->setTime(0, 0);
|
|
|
|
$item['timestamp'] = $createdAt->format('U');
|
|
|
|
} catch (\Exception $e) {
|
|
|
|
$item['timestamp'] = 0;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2022-06-22 19:32:54 +03:00
|
|
|
} else {
|
|
|
|
$item['timestamp'] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($resultContent) {
|
|
|
|
$item['content'] = $resultContent;
|
|
|
|
}
|
2016-12-07 00:45:52 +03:00
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
$this->items[] = $item;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2022-06-22 19:32:54 +03:00
|
|
|
// Sort by descending date
|
|
|
|
usort($this->items, function ($a, $b) {
|
|
|
|
return $b['timestamp'] <=> $a['timestamp'];
|
2022-07-01 16:10:30 +03:00
|
|
|
});
|
2016-12-07 00:45:52 +03:00
|
|
|
}
|
2022-06-22 19:32:54 +03:00
|
|
|
|
2019-10-16 22:44:28 +03:00
|
|
|
public function getURI()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2022-06-22 19:32:54 +03:00
|
|
|
if ($this->getInput('q')) {
|
|
|
|
$queryParameters = [
|
|
|
|
'q' => $this->getInput('q'),
|
|
|
|
'hl' => 'en',
|
|
|
|
'num' => '100', // get 100 results
|
|
|
|
'complete' => '0',
|
|
|
|
// in past year, sort by date, optionally verbatim
|
2022-04-17 00:16:38 +03:00
|
|
|
'tbs' => 'qdr:y,sbd:1' . ($this->getInput('verbatim') ? ',li:1' : ''),
|
2022-06-22 19:32:54 +03:00
|
|
|
];
|
|
|
|
return sprintf('https://www.google.com/search?%s', http_build_query($queryParameters));
|
2016-12-07 00:45:52 +03:00
|
|
|
}
|
|
|
|
|
2022-06-22 19:32:54 +03:00
|
|
|
return parent::getURI();
|
2019-10-16 22:44:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
public function getName()
|
2022-07-01 16:10:30 +03:00
|
|
|
{
|
2017-07-29 20:28:00 +03:00
|
|
|
if (!is_null($this->getInput('q'))) {
|
2019-10-16 22:44:28 +03:00
|
|
|
return $this->getInput('q') . ' - Google search';
|
|
|
|
}
|
|
|
|
|
2017-02-15 00:20:55 +03:00
|
|
|
return parent::getName();
|
2016-12-07 00:45:52 +03:00
|
|
|
}
|
2014-05-21 21:15:52 +04:00
|
|
|
}
|