KununuBridge.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. <?php
  2. class KununuBridge extends BridgeAbstract {
  3. const MAINTAINER = "logmanoriginal";
  4. const NAME = "Kununu Bridge";
  5. const URI = "https://www.kununu.com/";
  6. const DESCRIPTION = "Returns the latest reviews for a company and site of your choice.";
  7. const PARAMETERS = array(
  8. 'global' => array(
  9. 'site'=>array(
  10. 'name'=>'Site',
  11. 'type'=>'list',
  12. 'required'=>true,
  13. 'title'=>'Select your site',
  14. 'values'=>array(
  15. 'Austria'=>'at',
  16. 'Germany'=>'de',
  17. 'Switzerland'=>'ch',
  18. 'United States'=>'us'
  19. )
  20. ),
  21. 'full'=>array(
  22. 'name'=>'Load full article',
  23. 'type'=>'checkbox',
  24. 'required'=>false,
  25. 'exampleValue'=>'checked',
  26. 'title'=>'Activate to load full article'
  27. )
  28. ),
  29. array(
  30. 'company'=>array(
  31. 'name'=>'Company',
  32. 'required'=>true,
  33. 'exampleValue'=>'kununu-us',
  34. 'title'=>'Insert company name (i.e. Kununu US) or URI path (i.e. kununu-us)'
  35. )
  36. )
  37. );
  38. private $companyName='';
  39. public function getURI(){
  40. $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company')))));
  41. $site=$this->getInput('site');
  42. $section = '';
  43. switch($site){
  44. case 'at':
  45. case 'de':
  46. case 'ch':
  47. $section = 'kommentare';
  48. break;
  49. case 'us':
  50. $section = 'reviews';
  51. break;
  52. }
  53. return self::URI.$site.'/'.$company.'/'.$section;
  54. }
  55. function getName(){
  56. $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company')))));
  57. return ($this->companyName?:$company).' - '.self::NAME;
  58. }
  59. public function collectData(){
  60. $full = $this->getInput('full');
  61. // Load page
  62. $html = $this->getSimpleHTMLDOM($this->getURI());
  63. if(!$html)
  64. $this->returnServerError('Unable to receive data from ' . $this->getURI() . '!');
  65. // Update name for this request
  66. $this->companyName = $this->extract_company_name($html);
  67. // Find the section with all the panels (reviews)
  68. $section = $html->find('section.kununu-scroll-element', 0);
  69. if($section === false)
  70. $this->returnServerError('Unable to find panel section!');
  71. // Find all articles (within the panels)
  72. $articles = $section->find('article');
  73. if($articles === false || empty($articles))
  74. $this->returnServerError('Unable to find articles!');
  75. // Go through all articles
  76. foreach($articles as $article){
  77. $item = array();
  78. $item['author'] = $this->extract_article_author_position($article);
  79. $item['timestamp'] = $this->extract_article_date($article);
  80. $item['title'] = $this->extract_article_rating($article) . ' : ' . $this->extract_article_summary($article);
  81. $item['uri'] = $this->extract_article_uri($article);
  82. if($full)
  83. $item['content'] = $this->extract_full_description($item['uri']);
  84. else
  85. $item['content'] = $this->extract_article_description($article);
  86. $this->items[] = $item;
  87. }
  88. }
  89. public function getCacheDuration(){
  90. return 86400; // 1 day
  91. }
  92. /**
  93. * Fixes relative URLs in the given text
  94. */
  95. private function fix_url($text){
  96. return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
  97. }
  98. /**
  99. * Encodes unmlauts in the given text
  100. */
  101. private function encode_umlauts($text){
  102. $umlauts = Array("/ä/","/ö/","/ü/","/Ä/","/Ö/","/Ü/","/ß/");
  103. $replace = Array("ae","oe","ue","Ae","Oe","Ue","ss");
  104. return preg_replace($umlauts, $replace, $text);
  105. }
  106. /**
  107. * Returns the company name from the review html
  108. */
  109. private function extract_company_name($html){
  110. $panel = $html->find('div.panel', 0);
  111. if($panel === false)
  112. $this->returnServerError('Cannot find panel for company name!');
  113. $company_name = $panel->find('h1', 0);
  114. if($company_name === false)
  115. $this->returnServerError('Cannot find company name!');
  116. return $company_name->plaintext;
  117. }
  118. /**
  119. * Returns the date from a given article
  120. */
  121. private function extract_article_date($article){
  122. // They conviniently provide a time attribute for us :)
  123. $date = $article->find('time[itemprop=dtreviewed]', 0);
  124. if($date === false)
  125. $this->returnServerError('Cannot find article date!');
  126. return strtotime($date->datetime);
  127. }
  128. /**
  129. * Returns the rating from a given article
  130. */
  131. private function extract_article_rating($article){
  132. $rating = $article->find('span.rating', 0);
  133. if($rating === false)
  134. $this->returnServerError('Cannot find article rating!');
  135. return $rating->getAttribute('aria-label');
  136. }
  137. /**
  138. * Returns the summary from a given article
  139. */
  140. private function extract_article_summary($article){
  141. $summary = $article->find('[itemprop=summary]', 0);
  142. if($summary === false)
  143. $this->returnServerError('Cannot find article summary!');
  144. return strip_tags($summary->innertext);
  145. }
  146. /**
  147. * Returns the URI from a given article
  148. */
  149. private function extract_article_uri($article){
  150. // Notice: This first part is the same as in extract_article_summary!
  151. $summary = $article->find('[itemprop=summary]', 0);
  152. if($summary === false)
  153. $this->returnServerError('Cannot find article summary!');
  154. $anchor = $summary->find('a', 0);
  155. if($anchor === false)
  156. $this->returnServerError('Cannot find article URI!');
  157. return self::URI . $anchor->href;
  158. }
  159. /**
  160. * Returns the position of the author from a given article
  161. */
  162. private function extract_article_author_position($article){
  163. // We need to parse the aside manually
  164. $aside = $article->find('aside', 0);
  165. if($aside === false)
  166. $this->returnServerError('Cannot find article author information!');
  167. // Go through all h2 elements to find index of required span (I know... it's stupid)
  168. $author_position = 'Unknown';
  169. foreach($aside->find('h2') as $subject){
  170. if(stristr(strtolower($subject->plaintext), 'position')){ /* This works for at, ch, de, us */
  171. $author_position = $subject->next_sibling()->plaintext;
  172. break;
  173. }
  174. }
  175. return $author_position;
  176. }
  177. /**
  178. * Returns the description from a given article
  179. */
  180. private function extract_article_description($article){
  181. $description = $article->find('div[itemprop=description]', 0);
  182. if($description === false)
  183. $this->returnServerError('Cannot find article description!');
  184. return $this->fix_url($description->innertext);
  185. }
  186. /**
  187. * Returns the full description from a given uri
  188. */
  189. private function extract_full_description($uri){
  190. // Load full article
  191. $html = $this->getSimpleHTMLDOMCached($uri);
  192. if($html === false)
  193. $this->returnServerError('Could not load full description!');
  194. // Find the article
  195. $article = $html->find('article', 0);
  196. if($article === false)
  197. $this->returnServerError('Cannot find article!');
  198. // Luckily they use the same layout for the review overview and full article pages :)
  199. return $this->extract_article_description($article);
  200. }
  201. }