forked from blallo/rss-bridge
[Kununu] Fix source layout changed
This commit is contained in:
parent
5ad3198d71
commit
d0c9397613
1 changed files with 51 additions and 40 deletions
|
@ -42,9 +42,12 @@ class KununuBridge extends BridgeAbstract {
|
||||||
private $companyName = '';
|
private $companyName = '';
|
||||||
|
|
||||||
public function getURI(){
|
public function getURI(){
|
||||||
$company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company')))));
|
if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))){
|
||||||
|
|
||||||
|
$company = $this->fix_company_name($this->getInput('company'));
|
||||||
$site = $this->getInput('site');
|
$site = $this->getInput('site');
|
||||||
$section = '';
|
$section = '';
|
||||||
|
|
||||||
switch($site){
|
switch($site){
|
||||||
case 'at':
|
case 'at':
|
||||||
case 'de':
|
case 'de':
|
||||||
|
@ -59,16 +62,23 @@ class KununuBridge extends BridgeAbstract {
|
||||||
return self::URI . $site . '/' . $company . '/' . $section;
|
return self::URI . $site . '/' . $company . '/' . $section;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return parent::getURI();
|
||||||
|
}
|
||||||
|
|
||||||
function getName(){
|
function getName(){
|
||||||
$company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company')))));
|
if(!is_null($this->getInput('company'))){
|
||||||
|
$company = $this->fix_company_name($this->getInput('company'));
|
||||||
return ($this->companyName?:$company).' - '.self::NAME;
|
return ($this->companyName?:$company).' - '.self::NAME;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return paren::getName();
|
||||||
|
}
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
$full = $this->getInput('full');
|
$full = $this->getInput('full');
|
||||||
|
|
||||||
// Load page
|
// Load page
|
||||||
$html = getSimpleHTMLDOM($this->getURI());
|
$html = getSimpleHTMLDOMCached($this->getURI());
|
||||||
if(!$html)
|
if(!$html)
|
||||||
returnServerError('Unable to receive data from ' . $this->getURI() . '!');
|
returnServerError('Unable to receive data from ' . $this->getURI() . '!');
|
||||||
// Update name for this request
|
// Update name for this request
|
||||||
|
@ -109,6 +119,16 @@ class KununuBridge extends BridgeAbstract {
|
||||||
return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
|
return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a fixed version of the provided company name
|
||||||
|
*/
|
||||||
|
private function fix_company_name($company){
|
||||||
|
$company = trim($company);
|
||||||
|
$company = str_replace(' ', '-', $company);
|
||||||
|
$company = strtolower($company);
|
||||||
|
return $this->encode_umlauts($company);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes unmlauts in the given text
|
* Encodes unmlauts in the given text
|
||||||
*/
|
*/
|
||||||
|
@ -123,11 +143,7 @@ class KununuBridge extends BridgeAbstract {
|
||||||
* Returns the company name from the review html
|
* Returns the company name from the review html
|
||||||
*/
|
*/
|
||||||
private function extract_company_name($html){
|
private function extract_company_name($html){
|
||||||
$panel = $html->find('div.panel', 0);
|
$company_name = $html->find('h1[itemprop=name]', 0);
|
||||||
if(is_null($panel))
|
|
||||||
returnServerError('Cannot find panel for company name!');
|
|
||||||
|
|
||||||
$company_name = $panel->find('h1', 0);
|
|
||||||
if(is_null($company_name))
|
if(is_null($company_name))
|
||||||
returnServerError('Cannot find company name!');
|
returnServerError('Cannot find company name!');
|
||||||
|
|
||||||
|
@ -139,11 +155,11 @@ class KununuBridge extends BridgeAbstract {
|
||||||
*/
|
*/
|
||||||
private function extract_article_date($article){
|
private function extract_article_date($article){
|
||||||
// They conviniently provide a time attribute for us :)
|
// They conviniently provide a time attribute for us :)
|
||||||
$date = $article->find('time[itemprop=dtreviewed]', 0);
|
$date = $article->find('meta[itemprop=dateCreated]', 0);
|
||||||
if(is_null($date))
|
if(is_null($date))
|
||||||
returnServerError('Cannot find article date!');
|
returnServerError('Cannot find article date!');
|
||||||
|
|
||||||
return strtotime($date->datetime);
|
return strtotime($date->content);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -161,7 +177,7 @@ class KununuBridge extends BridgeAbstract {
|
||||||
* Returns the summary from a given article
|
* Returns the summary from a given article
|
||||||
*/
|
*/
|
||||||
private function extract_article_summary($article){
|
private function extract_article_summary($article){
|
||||||
$summary = $article->find('[itemprop=summary]', 0);
|
$summary = $article->find('[itemprop=name]', 0);
|
||||||
if(is_null($summary))
|
if(is_null($summary))
|
||||||
returnServerError('Cannot find article summary!');
|
returnServerError('Cannot find article summary!');
|
||||||
|
|
||||||
|
@ -172,32 +188,27 @@ class KununuBridge extends BridgeAbstract {
|
||||||
* Returns the URI from a given article
|
* Returns the URI from a given article
|
||||||
*/
|
*/
|
||||||
private function extract_article_uri($article){
|
private function extract_article_uri($article){
|
||||||
// Notice: This first part is the same as in extract_article_summary!
|
$anchor = $article->find('ku-company-review-more', 0);
|
||||||
$summary = $article->find('[itemprop=summary]', 0);
|
|
||||||
if(is_null($summary))
|
|
||||||
returnServerError('Cannot find article summary!');
|
|
||||||
|
|
||||||
$anchor = $summary->find('a', 0);
|
|
||||||
if(is_null($anchor))
|
if(is_null($anchor))
|
||||||
returnServerError('Cannot find article URI!');
|
returnServerError('Cannot find article URI!');
|
||||||
|
|
||||||
return self::URI . $anchor->href;
|
return self::URI . $anchor->{'review-url'};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the position of the author from a given article
|
* Returns the position of the author from a given article
|
||||||
*/
|
*/
|
||||||
private function extract_article_author_position($article){
|
private function extract_article_author_position($article){
|
||||||
// We need to parse the aside manually
|
// We need to parse the user-content manually
|
||||||
$aside = $article->find('aside', 0);
|
$user_content = $article->find('div.user-content', 0);
|
||||||
if(is_null($aside))
|
if(is_null($user_content))
|
||||||
returnServerError('Cannot find article author information!');
|
returnServerError('Cannot find user content!');
|
||||||
|
|
||||||
// Go through all h2 elements to find index of required span (I know... it's stupid)
|
// Go through all h2 elements to find index of required span (I know... it's stupid)
|
||||||
$author_position = 'Unknown';
|
$author_position = 'Unknown';
|
||||||
foreach($aside->find('h2') as $subject){
|
foreach($user_content->find('div') as $content){
|
||||||
if(stristr(strtolower($subject->plaintext), 'position')){ /* This works for at, ch, de, us */
|
if(stristr(strtolower($content->plaintext), 'position')){ /* This works for at, ch, de, us */
|
||||||
$author_position = $subject->next_sibling()->plaintext;
|
$author_position = $content->next_sibling()->plaintext;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -209,7 +220,7 @@ class KununuBridge extends BridgeAbstract {
|
||||||
* Returns the description from a given article
|
* Returns the description from a given article
|
||||||
*/
|
*/
|
||||||
private function extract_article_description($article){
|
private function extract_article_description($article){
|
||||||
$description = $article->find('div[itemprop=description]', 0);
|
$description = $article->find('[itemprop=reviewBody]', 0);
|
||||||
if(is_null($description))
|
if(is_null($description))
|
||||||
returnServerError('Cannot find article description!');
|
returnServerError('Cannot find article description!');
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue