[Kununu] Fix source layout changed

This commit is contained in:
logmanoriginal 2016-12-17 18:04:21 +01:00
parent 5ad3198d71
commit d0c9397613

View file

@ -42,9 +42,12 @@ class KununuBridge extends BridgeAbstract {
private $companyName = ''; private $companyName = '';
public function getURI(){ public function getURI(){
$company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))){
$company = $this->fix_company_name($this->getInput('company'));
$site = $this->getInput('site'); $site = $this->getInput('site');
$section = ''; $section = '';
switch($site){ switch($site){
case 'at': case 'at':
case 'de': case 'de':
@ -59,16 +62,23 @@ class KununuBridge extends BridgeAbstract {
return self::URI . $site . '/' . $company . '/' . $section; return self::URI . $site . '/' . $company . '/' . $section;
} }
return parent::getURI();
}
function getName(){ function getName(){
$company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); if(!is_null($this->getInput('company'))){
$company = $this->fix_company_name($this->getInput('company'));
return ($this->companyName?:$company).' - '.self::NAME; return ($this->companyName?:$company).' - '.self::NAME;
} }
return paren::getName();
}
public function collectData(){ public function collectData(){
$full = $this->getInput('full'); $full = $this->getInput('full');
// Load page // Load page
$html = getSimpleHTMLDOM($this->getURI()); $html = getSimpleHTMLDOMCached($this->getURI());
if(!$html) if(!$html)
returnServerError('Unable to receive data from ' . $this->getURI() . '!'); returnServerError('Unable to receive data from ' . $this->getURI() . '!');
// Update name for this request // Update name for this request
@ -109,6 +119,16 @@ class KununuBridge extends BridgeAbstract {
return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text); return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
} }
/*
* Returns a fixed version of the provided company name
*/
private function fix_company_name($company){
$company = trim($company);
$company = str_replace(' ', '-', $company);
$company = strtolower($company);
return $this->encode_umlauts($company);
}
/** /**
* Encodes unmlauts in the given text * Encodes unmlauts in the given text
*/ */
@ -123,11 +143,7 @@ class KununuBridge extends BridgeAbstract {
* Returns the company name from the review html * Returns the company name from the review html
*/ */
private function extract_company_name($html){ private function extract_company_name($html){
$panel = $html->find('div.panel', 0); $company_name = $html->find('h1[itemprop=name]', 0);
if(is_null($panel))
returnServerError('Cannot find panel for company name!');
$company_name = $panel->find('h1', 0);
if(is_null($company_name)) if(is_null($company_name))
returnServerError('Cannot find company name!'); returnServerError('Cannot find company name!');
@ -139,11 +155,11 @@ class KununuBridge extends BridgeAbstract {
*/ */
private function extract_article_date($article){ private function extract_article_date($article){
// They conviniently provide a time attribute for us :) // They conviniently provide a time attribute for us :)
$date = $article->find('time[itemprop=dtreviewed]', 0); $date = $article->find('meta[itemprop=dateCreated]', 0);
if(is_null($date)) if(is_null($date))
returnServerError('Cannot find article date!'); returnServerError('Cannot find article date!');
return strtotime($date->datetime); return strtotime($date->content);
} }
/** /**
@ -161,7 +177,7 @@ class KununuBridge extends BridgeAbstract {
* Returns the summary from a given article * Returns the summary from a given article
*/ */
private function extract_article_summary($article){ private function extract_article_summary($article){
$summary = $article->find('[itemprop=summary]', 0); $summary = $article->find('[itemprop=name]', 0);
if(is_null($summary)) if(is_null($summary))
returnServerError('Cannot find article summary!'); returnServerError('Cannot find article summary!');
@ -172,32 +188,27 @@ class KununuBridge extends BridgeAbstract {
* Returns the URI from a given article * Returns the URI from a given article
*/ */
private function extract_article_uri($article){ private function extract_article_uri($article){
// Notice: This first part is the same as in extract_article_summary! $anchor = $article->find('ku-company-review-more', 0);
$summary = $article->find('[itemprop=summary]', 0);
if(is_null($summary))
returnServerError('Cannot find article summary!');
$anchor = $summary->find('a', 0);
if(is_null($anchor)) if(is_null($anchor))
returnServerError('Cannot find article URI!'); returnServerError('Cannot find article URI!');
return self::URI . $anchor->href; return self::URI . $anchor->{'review-url'};
} }
/** /**
* Returns the position of the author from a given article * Returns the position of the author from a given article
*/ */
private function extract_article_author_position($article){ private function extract_article_author_position($article){
// We need to parse the aside manually // We need to parse the user-content manually
$aside = $article->find('aside', 0); $user_content = $article->find('div.user-content', 0);
if(is_null($aside)) if(is_null($user_content))
returnServerError('Cannot find article author information!'); returnServerError('Cannot find user content!');
// Go through all h2 elements to find index of required span (I know... it's stupid) // Go through all h2 elements to find index of required span (I know... it's stupid)
$author_position = 'Unknown'; $author_position = 'Unknown';
foreach($aside->find('h2') as $subject){ foreach($user_content->find('div') as $content){
if(stristr(strtolower($subject->plaintext), 'position')){ /* This works for at, ch, de, us */ if(stristr(strtolower($content->plaintext), 'position')){ /* This works for at, ch, de, us */
$author_position = $subject->next_sibling()->plaintext; $author_position = $content->next_sibling()->plaintext;
break; break;
} }
} }
@ -209,7 +220,7 @@ class KununuBridge extends BridgeAbstract {
* Returns the description from a given article * Returns the description from a given article
*/ */
private function extract_article_description($article){ private function extract_article_description($article){
$description = $article->find('div[itemprop=description]', 0); $description = $article->find('[itemprop=reviewBody]', 0);
if(is_null($description)) if(is_null($description))
returnServerError('Cannot find article description!'); returnServerError('Cannot find article description!');