2019-06-22 19:50:06 +03:00
|
|
|
<?php
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
class IndeedBridge extends BridgeAbstract
|
|
|
|
{
|
|
|
|
const NAME = 'Indeed';
|
|
|
|
const URI = 'https://www.indeed.com/';
|
|
|
|
const DESCRIPTION = 'Returns reviews and comments for a company of your choice';
|
|
|
|
const MAINTAINER = 'logmanoriginal';
|
|
|
|
const CACHE_TIMEOUT = 14400; // 4 hours
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
const PARAMETERS = [
|
2022-07-01 16:10:30 +03:00
|
|
|
[
|
2019-06-22 19:50:06 +03:00
|
|
|
'c' => [
|
|
|
|
'name' => 'Company',
|
|
|
|
'type' => 'text',
|
|
|
|
'required' => true,
|
|
|
|
'title' => 'Company name',
|
|
|
|
'exampleValue' => 'GitHub',
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
2019-06-22 19:50:06 +03:00
|
|
|
],
|
|
|
|
'global' => [
|
|
|
|
'language' => [
|
|
|
|
'name' => 'Language Code',
|
|
|
|
'type' => 'list',
|
|
|
|
'title' => 'Choose your language code',
|
|
|
|
'defaultValue' => 'en-US',
|
|
|
|
'values' => [
|
|
|
|
'es-AR' => 'es-AR',
|
|
|
|
'de-AT' => 'de-AT',
|
|
|
|
'en-AU' => 'en-AU',
|
|
|
|
'nl-BE' => 'nl-BE',
|
|
|
|
'fr-BE' => 'fr-BE',
|
|
|
|
'pt-BR' => 'pt-BR',
|
|
|
|
'en-CA' => 'en-CA',
|
|
|
|
'fr-CA' => 'fr-CA',
|
|
|
|
'de-CH' => 'de-CH',
|
|
|
|
'fr-CH' => 'fr-CH',
|
|
|
|
'es-CL' => 'es-CL',
|
|
|
|
'zh-CN' => 'zh-CN',
|
|
|
|
'es-CO' => 'es-CO',
|
|
|
|
'de-DE' => 'de-DE',
|
|
|
|
'es-ES' => 'es-ES',
|
|
|
|
'fr-FR' => 'fr-FR',
|
|
|
|
'en-GB' => 'en-GB',
|
|
|
|
'en-HK' => 'en-HK',
|
|
|
|
'en-IE' => 'en-IE',
|
|
|
|
'en-IN' => 'en-IN',
|
|
|
|
'it-IT' => 'it-IT',
|
|
|
|
'ja-JP' => 'ja-JP',
|
|
|
|
'ko-KR' => 'ko-KR',
|
|
|
|
'es-MX' => 'es-MX',
|
|
|
|
'nl-NL' => 'nl-NL',
|
|
|
|
'pl-PL' => 'pl-PL',
|
|
|
|
'en-SG' => 'en-SG',
|
|
|
|
'en-US' => 'en-US',
|
|
|
|
'en-ZA' => 'en-ZA',
|
|
|
|
'en-AE' => 'en-AE',
|
|
|
|
'da-DK' => 'da-DK',
|
|
|
|
'in-ID' => 'in-ID',
|
|
|
|
'en-MY' => 'en-MY',
|
|
|
|
'es-PE' => 'es-PE',
|
|
|
|
'en-PH' => 'en-PH',
|
|
|
|
'en-PK' => 'en-PK',
|
|
|
|
'ro-RO' => 'ro-RO',
|
|
|
|
'ru-RU' => 'ru-RU',
|
|
|
|
'tr-TR' => 'tr-TR',
|
|
|
|
'zh-TW' => 'zh-TW',
|
|
|
|
'vi-VN' => 'vi-VN',
|
|
|
|
'en-VN' => 'en-VN',
|
|
|
|
'ar-EG' => 'ar-EG',
|
|
|
|
'fr-MA' => 'fr-MA',
|
|
|
|
'en-NG' => 'en-NG',
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
2019-06-22 19:50:06 +03:00
|
|
|
],
|
|
|
|
'limit' => [
|
|
|
|
'name' => 'Limit',
|
|
|
|
'type' => 'number',
|
2022-03-24 13:59:34 +03:00
|
|
|
'required' => true,
|
2019-06-22 19:50:06 +03:00
|
|
|
'title' => 'Maximum number of items to return',
|
|
|
|
'exampleValue' => 20,
|
2022-07-01 16:10:30 +03:00
|
|
|
]
|
|
|
|
]
|
2019-06-22 19:50:06 +03:00
|
|
|
];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
const SITES = [
|
|
|
|
'es-AR' => 'https://ar.indeed.com/',
|
|
|
|
'de-AT' => 'https://at.indeed.com/',
|
|
|
|
'en-AU' => 'https://au.indeed.com/',
|
|
|
|
'nl-BE' => 'https://be.indeed.com/',
|
|
|
|
'fr-BE' => 'https://emplois.be.indeed.com/',
|
|
|
|
'pt-BR' => 'https://www.indeed.com.br/',
|
|
|
|
'en-CA' => 'https://ca.indeed.com/',
|
|
|
|
'fr-CA' => 'https://emplois.ca.indeed.com/',
|
|
|
|
'de-CH' => 'https://www.indeed.ch/',
|
|
|
|
'fr-CH' => 'https://emplois.indeed.ch/',
|
|
|
|
'es-CL' => 'https://www.indeed.cl/',
|
|
|
|
'zh-CN' => 'https://cn.indeed.com/',
|
|
|
|
'es-CO' => 'https://co.indeed.com/',
|
|
|
|
'de-DE' => 'https://de.indeed.com/',
|
|
|
|
'es-ES' => 'https://www.indeed.es/',
|
|
|
|
'fr-FR' => 'https://www.indeed.fr/',
|
|
|
|
'en-GB' => 'https://www.indeed.co.uk/',
|
|
|
|
'en-HK' => 'https://www.indeed.hk/',
|
|
|
|
'en-IE' => 'https://ie.indeed.com/',
|
|
|
|
'en-IN' => 'https://www.indeed.co.in/',
|
|
|
|
'it-IT' => 'https://it.indeed.com/',
|
|
|
|
'ja-JP' => 'https://jp.indeed.com/',
|
|
|
|
'ko-KR' => 'https://kr.indeed.com/',
|
|
|
|
'es-MX' => 'https://www.indeed.com.mx/',
|
|
|
|
'nl-NL' => 'https://www.indeed.nl/',
|
|
|
|
'pl-PL' => 'https://pl.indeed.com/',
|
|
|
|
'en-SG' => 'https://www.indeed.com.sg/',
|
|
|
|
'en-US' => 'https://www.indeed.com/',
|
|
|
|
'en-ZA' => 'https://www.indeed.co.za/',
|
|
|
|
'en-AE' => 'https://www.indeed.ae/',
|
|
|
|
'da-DK' => 'https://dk.indeed.com/',
|
|
|
|
'in-ID' => 'https://id.indeed.com/',
|
|
|
|
'en-MY' => 'https://www.indeed.com.my/',
|
|
|
|
'es-PE' => 'https://www.indeed.com.pe/',
|
|
|
|
'en-PH' => 'https://www.indeed.com.ph/',
|
|
|
|
'en-PK' => 'https://www.indeed.com.pk/',
|
|
|
|
'ro-RO' => 'https://ro.indeed.com/',
|
|
|
|
'ru-RU' => 'https://ru.indeed.com/',
|
|
|
|
'tr-TR' => 'https://tr.indeed.com/',
|
|
|
|
'zh-TW' => 'https://tw.indeed.com/',
|
|
|
|
'vi-VN' => 'https://vn.indeed.com/',
|
|
|
|
'en-VN' => 'https://jobs.vn.indeed.com/',
|
|
|
|
'ar-EG' => 'https://eg.indeed.com/',
|
|
|
|
'fr-MA' => 'https://ma.indeed.com/',
|
|
|
|
'en-NG' => 'https://ng.indeed.com/',
|
|
|
|
];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
private $title;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$url = $this->getURI();
|
|
|
|
$limit = $this->getInput('limit') ?: 20;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
do {
|
2022-01-02 12:36:09 +03:00
|
|
|
$html = getSimpleHTMLDOM($url);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
$html = defaultLinkTo($html, $url);
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
$this->title = $html->find('h1', 0)->innertext;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-03-24 05:07:17 +03:00
|
|
|
foreach ($html->find('.cmp-ReviewsList div[itemprop="review"]') as $review) {
|
2019-06-22 19:50:06 +03:00
|
|
|
$item = [];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-03-24 05:07:17 +03:00
|
|
|
$title = $review->find('h2[data-testid="title"]', 0)->innertext;
|
|
|
|
$rating = $review->find('meta[itemprop="ratingValue"]', 0)->getAttribute('content');
|
|
|
|
$comment = $review->find('span[itemprop="reviewBody"]', 0)->innertext;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2022-03-24 05:07:17 +03:00
|
|
|
$item['uri'] = $review->find('a[data-tn-element="individualReviewLink"]', 0)->href;
|
|
|
|
$item['title'] = "$title | ($rating)";
|
|
|
|
$item['author'] = $review->find('span > meta[itemprop="name"]', 0)->getAttribute('content');
|
2019-06-22 19:50:06 +03:00
|
|
|
$item['content'] = $comment;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
$this->items[] = $item;
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
if (count($this->items) >= $limit) {
|
|
|
|
break;
|
2022-07-01 16:10:30 +03:00
|
|
|
}
|
2019-06-22 19:50:06 +03:00
|
|
|
}
|
|
|
|
} while (count($this->items) < $limit);
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
public function getURI()
|
|
|
|
{
|
|
|
|
if (
|
|
|
|
$this->getInput('language')
|
|
|
|
&& $this->getInput('c')
|
|
|
|
) {
|
|
|
|
return self::SITES[$this->getInput('language')]
|
|
|
|
. 'cmp/'
|
|
|
|
. urlencode($this->getInput('c'))
|
|
|
|
. '/reviews';
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
return parent::getURI();
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
public function getName()
|
|
|
|
{
|
|
|
|
return $this->title ?: parent::getName();
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
public function detectParameters($url)
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* Expected: https://<...>.indeed.<...>/cmp/<company>[/reviews][/...]
|
|
|
|
*
|
|
|
|
* Note that most users will be redirected to their localized version
|
|
|
|
* of the page, which adds the language code to the host. For example,
|
|
|
|
* "en.indeed.com" or "www.indeed.fr" (see link[rel="alternate"]). At
|
|
|
|
* least each of the sites have ".indeed." in the name.
|
|
|
|
*/
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
if (
|
|
|
|
filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED) === false
|
|
|
|
|| stristr($url, '.indeed.') === false
|
|
|
|
) {
|
|
|
|
return null;
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
$url_components = parse_url($url);
|
|
|
|
$path_segments = array_values(array_filter(explode('/', $url_components['path'])));
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
if (count($path_segments) < 2 || $path_segments[0] !== 'cmp') {
|
|
|
|
return null;
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
$language = array_search('https://' . $url_components['host'] . '/', self::SITES);
|
|
|
|
if ($language === false) {
|
|
|
|
return null;
|
|
|
|
}
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
$limit = self::PARAMETERS['global']['limit']['defaultValue'] ?: 20;
|
|
|
|
$company = $path_segments[1];
|
2022-07-01 16:10:30 +03:00
|
|
|
|
2019-06-22 19:50:06 +03:00
|
|
|
return [
|
|
|
|
'c' => $company,
|
|
|
|
'language' => $language,
|
|
|
|
'limit' => $limit,
|
|
|
|
];
|
|
|
|
}
|
|
|
|
}
|