KununuBridge.php 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. <?php
  2. class KununuBridge extends BridgeAbstract {
  3. const MAINTAINER = "logmanoriginal";
  4. const NAME = "Kununu Bridge";
  5. const URI = "https://www.kununu.com/";
  6. const CACHE_TIMEOUT = 86400; // 24h
  7. const DESCRIPTION = "Returns the latest reviews for a company and site of your choice.";
  8. const PARAMETERS = array(
  9. 'global' => array(
  10. 'site'=>array(
  11. 'name'=>'Site',
  12. 'type'=>'list',
  13. 'required'=>true,
  14. 'title'=>'Select your site',
  15. 'values'=>array(
  16. 'Austria'=>'at',
  17. 'Germany'=>'de',
  18. 'Switzerland'=>'ch',
  19. 'United States'=>'us'
  20. )
  21. ),
  22. 'full'=>array(
  23. 'name'=>'Load full article',
  24. 'type'=>'checkbox',
  25. 'required'=>false,
  26. 'exampleValue'=>'checked',
  27. 'title'=>'Activate to load full article'
  28. )
  29. ),
  30. array(
  31. 'company'=>array(
  32. 'name'=>'Company',
  33. 'required'=>true,
  34. 'exampleValue'=>'kununu-us',
  35. 'title'=>'Insert company name (i.e. Kununu US) or URI path (i.e. kununu-us)'
  36. )
  37. )
  38. );
  39. private $companyName='';
  40. public function getURI(){
  41. $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company')))));
  42. $site=$this->getInput('site');
  43. $section = '';
  44. switch($site){
  45. case 'at':
  46. case 'de':
  47. case 'ch':
  48. $section = 'kommentare';
  49. break;
  50. case 'us':
  51. $section = 'reviews';
  52. break;
  53. }
  54. return self::URI.$site.'/'.$company.'/'.$section;
  55. }
  56. function getName(){
  57. $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company')))));
  58. return ($this->companyName?:$company).' - '.self::NAME;
  59. }
  60. public function collectData(){
  61. $full = $this->getInput('full');
  62. // Load page
  63. $html = getSimpleHTMLDOM($this->getURI());
  64. if(!$html)
  65. returnServerError('Unable to receive data from ' . $this->getURI() . '!');
  66. // Update name for this request
  67. $this->companyName = $this->extract_company_name($html);
  68. // Find the section with all the panels (reviews)
  69. $section = $html->find('section.kununu-scroll-element', 0);
  70. if($section === false)
  71. returnServerError('Unable to find panel section!');
  72. // Find all articles (within the panels)
  73. $articles = $section->find('article');
  74. if($articles === false || empty($articles))
  75. returnServerError('Unable to find articles!');
  76. // Go through all articles
  77. foreach($articles as $article){
  78. $item = array();
  79. $item['author'] = $this->extract_article_author_position($article);
  80. $item['timestamp'] = $this->extract_article_date($article);
  81. $item['title'] = $this->extract_article_rating($article) . ' : ' . $this->extract_article_summary($article);
  82. $item['uri'] = $this->extract_article_uri($article);
  83. if($full)
  84. $item['content'] = $this->extract_full_description($item['uri']);
  85. else
  86. $item['content'] = $this->extract_article_description($article);
  87. $this->items[] = $item;
  88. }
  89. }
  90. /**
  91. * Fixes relative URLs in the given text
  92. */
  93. private function fix_url($text){
  94. return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
  95. }
  96. /**
  97. * Encodes unmlauts in the given text
  98. */
  99. private function encode_umlauts($text){
  100. $umlauts = Array("/ä/","/ö/","/ü/","/Ä/","/Ö/","/Ü/","/ß/");
  101. $replace = Array("ae","oe","ue","Ae","Oe","Ue","ss");
  102. return preg_replace($umlauts, $replace, $text);
  103. }
  104. /**
  105. * Returns the company name from the review html
  106. */
  107. private function extract_company_name($html){
  108. $panel = $html->find('div.panel', 0);
  109. if($panel === false)
  110. returnServerError('Cannot find panel for company name!');
  111. $company_name = $panel->find('h1', 0);
  112. if($company_name === false)
  113. returnServerError('Cannot find company name!');
  114. return $company_name->plaintext;
  115. }
  116. /**
  117. * Returns the date from a given article
  118. */
  119. private function extract_article_date($article){
  120. // They conviniently provide a time attribute for us :)
  121. $date = $article->find('time[itemprop=dtreviewed]', 0);
  122. if($date === false)
  123. returnServerError('Cannot find article date!');
  124. return strtotime($date->datetime);
  125. }
  126. /**
  127. * Returns the rating from a given article
  128. */
  129. private function extract_article_rating($article){
  130. $rating = $article->find('span.rating', 0);
  131. if($rating === false)
  132. returnServerError('Cannot find article rating!');
  133. return $rating->getAttribute('aria-label');
  134. }
  135. /**
  136. * Returns the summary from a given article
  137. */
  138. private function extract_article_summary($article){
  139. $summary = $article->find('[itemprop=summary]', 0);
  140. if($summary === false)
  141. returnServerError('Cannot find article summary!');
  142. return strip_tags($summary->innertext);
  143. }
  144. /**
  145. * Returns the URI from a given article
  146. */
  147. private function extract_article_uri($article){
  148. // Notice: This first part is the same as in extract_article_summary!
  149. $summary = $article->find('[itemprop=summary]', 0);
  150. if($summary === false)
  151. returnServerError('Cannot find article summary!');
  152. $anchor = $summary->find('a', 0);
  153. if($anchor === false)
  154. returnServerError('Cannot find article URI!');
  155. return self::URI . $anchor->href;
  156. }
  157. /**
  158. * Returns the position of the author from a given article
  159. */
  160. private function extract_article_author_position($article){
  161. // We need to parse the aside manually
  162. $aside = $article->find('aside', 0);
  163. if($aside === false)
  164. returnServerError('Cannot find article author information!');
  165. // Go through all h2 elements to find index of required span (I know... it's stupid)
  166. $author_position = 'Unknown';
  167. foreach($aside->find('h2') as $subject){
  168. if(stristr(strtolower($subject->plaintext), 'position')){ /* This works for at, ch, de, us */
  169. $author_position = $subject->next_sibling()->plaintext;
  170. break;
  171. }
  172. }
  173. return $author_position;
  174. }
  175. /**
  176. * Returns the description from a given article
  177. */
  178. private function extract_article_description($article){
  179. $description = $article->find('div[itemprop=description]', 0);
  180. if($description === false)
  181. returnServerError('Cannot find article description!');
  182. return $this->fix_url($description->innertext);
  183. }
  184. /**
  185. * Returns the full description from a given uri
  186. */
  187. private function extract_full_description($uri){
  188. // Load full article
  189. $html = getSimpleHTMLDOMCached($uri);
  190. if($html === false)
  191. returnServerError('Could not load full description!');
  192. // Find the article
  193. $article = $html->find('article', 0);
  194. if($article === false)
  195. returnServerError('Cannot find article!');
  196. // Luckily they use the same layout for the review overview and full article pages :)
  197. return $this->extract_article_description($article);
  198. }
  199. }