KununuBridge.php 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. <?php
  2. class KununuBridge extends BridgeAbstract {
  3. const MAINTAINER = 'logmanoriginal';
  4. const NAME = 'Kununu Bridge';
  5. const URI = 'https://www.kununu.com/';
  6. const CACHE_TIMEOUT = 86400; // 24h
  7. const DESCRIPTION = 'Returns the latest reviews for a company and site of your choice.';
  8. const PARAMETERS = array(
  9. 'global' => array(
  10. 'site' => array(
  11. 'name' => 'Site',
  12. 'type' => 'list',
  13. 'required' => true,
  14. 'title' => 'Select your site',
  15. 'values' => array(
  16. 'Austria' => 'at',
  17. 'Germany' => 'de',
  18. 'Switzerland' => 'ch',
  19. 'United States' => 'us'
  20. )
  21. ),
  22. 'full' => array(
  23. 'name' => 'Load full article',
  24. 'type' => 'checkbox',
  25. 'required' => false,
  26. 'exampleValue' => 'checked',
  27. 'title' => 'Activate to load full article'
  28. )
  29. ),
  30. array(
  31. 'company' => array(
  32. 'name' => 'Company',
  33. 'required' => true,
  34. 'exampleValue' => 'kununu-us',
  35. 'title' => 'Insert company name (i.e. Kununu US) or URI path (i.e. kununu-us)'
  36. )
  37. )
  38. );
  39. private $companyName = '';
  40. public function getURI(){
  41. if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))) {
  42. $company = $this->fixCompanyName($this->getInput('company'));
  43. $site = $this->getInput('site');
  44. $section = '';
  45. switch($site) {
  46. case 'at':
  47. case 'de':
  48. case 'ch':
  49. $section = 'kommentare';
  50. break;
  51. case 'us':
  52. $section = 'reviews';
  53. break;
  54. }
  55. return self::URI . $site . '/' . $company . '/' . $section . '?sort=update_time_desc';
  56. }
  57. return parent::getURI();
  58. }
  59. function getName(){
  60. if(!is_null($this->getInput('company'))) {
  61. $company = $this->fixCompanyName($this->getInput('company'));
  62. return ($this->companyName ?: $company) . ' - ' . self::NAME;
  63. }
  64. return parent::getName();
  65. }
  66. public function collectData(){
  67. $full = $this->getInput('full');
  68. // Load page
  69. $html = getSimpleHTMLDOMCached($this->getURI());
  70. if(!$html)
  71. returnServerError('Unable to receive data from ' . $this->getURI() . '!');
  72. // Update name for this request
  73. $this->companyName = $this->extractCompanyName($html);
  74. // Find the section with all the panels (reviews)
  75. $section = $html->find('section.kununu-scroll-element', 0);
  76. if($section === false)
  77. returnServerError('Unable to find panel section!');
  78. // Find all articles (within the panels)
  79. $articles = $section->find('article');
  80. if($articles === false || empty($articles))
  81. returnServerError('Unable to find articles!');
  82. // Go through all articles
  83. foreach($articles as $article) {
  84. $item = array();
  85. $item['author'] = $this->extractArticleAuthorPosition($article);
  86. $item['timestamp'] = $this->extractArticleDate($article);
  87. $item['title'] = $this->extractArticleRating($article)
  88. . ' : '
  89. . $this->extractArticleSummary($article);
  90. $item['uri'] = $this->extractArticleUri($article);
  91. if($full)
  92. $item['content'] = $this->extractFullDescription($item['uri']);
  93. else
  94. $item['content'] = $this->extractArticleDescription($article);
  95. $this->items[] = $item;
  96. }
  97. }
  98. /**
  99. * Fixes relative URLs in the given text
  100. */
  101. private function fixUrl($text){
  102. return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
  103. }
  104. /*
  105. * Returns a fixed version of the provided company name
  106. */
  107. private function fixCompanyName($company){
  108. $company = trim($company);
  109. $company = str_replace(' ', '-', $company);
  110. $company = strtolower($company);
  111. return $this->encodeUmlauts($company);
  112. }
  113. /**
  114. * Encodes unmlauts in the given text
  115. */
  116. private function encodeUmlauts($text){
  117. $umlauts = Array('/ä/','/ö/','/ü/','/Ä/','/Ö/','/Ü/','/ß/');
  118. $replace = Array('ae','oe','ue','Ae','Oe','Ue','ss');
  119. return preg_replace($umlauts, $replace, $text);
  120. }
  121. /**
  122. * Returns the company name from the review html
  123. */
  124. private function extractCompanyName($html){
  125. $company_name = $html->find('h1[itemprop=name]', 0);
  126. if(is_null($company_name))
  127. returnServerError('Cannot find company name!');
  128. return $company_name->plaintext;
  129. }
  130. /**
  131. * Returns the date from a given article
  132. */
  133. private function extractArticleDate($article){
  134. // They conviniently provide a time attribute for us :)
  135. $date = $article->find('meta[itemprop=dateCreated]', 0);
  136. if(is_null($date))
  137. returnServerError('Cannot find article date!');
  138. return strtotime($date->content);
  139. }
  140. /**
  141. * Returns the rating from a given article
  142. */
  143. private function extractArticleRating($article){
  144. $rating = $article->find('span.rating', 0);
  145. if(is_null($rating))
  146. returnServerError('Cannot find article rating!');
  147. return $rating->getAttribute('aria-label');
  148. }
  149. /**
  150. * Returns the summary from a given article
  151. */
  152. private function extractArticleSummary($article){
  153. $summary = $article->find('[itemprop=name]', 0);
  154. if(is_null($summary))
  155. returnServerError('Cannot find article summary!');
  156. return strip_tags($summary->innertext);
  157. }
  158. /**
  159. * Returns the URI from a given article
  160. */
  161. private function extractArticleUri($article){
  162. $anchor = $article->find('h1.review-title a', 0);
  163. if(is_null($anchor))
  164. returnServerError('Cannot find article URI!');
  165. return self::URI . $anchor->href;
  166. }
  167. /**
  168. * Returns the position of the author from a given article
  169. */
  170. private function extractArticleAuthorPosition($article){
  171. // We need to parse the user-content manually
  172. $user_content = $article->find('div.user-content', 0);
  173. if(is_null($user_content))
  174. returnServerError('Cannot find user content!');
  175. // Go through all h2 elements to find index of required span (I know... it's stupid)
  176. $author_position = 'Unknown';
  177. foreach($user_content->find('div') as $content) {
  178. if(stristr(strtolower($content->plaintext), 'position')) { /* This works for at, ch, de, us */
  179. $author_position = $content->next_sibling()->plaintext;
  180. break;
  181. }
  182. }
  183. return $author_position;
  184. }
  185. /**
  186. * Returns the description from a given article
  187. */
  188. private function extractArticleDescription($article){
  189. $description = $article->find('[itemprop=reviewBody]', 0);
  190. if(is_null($description))
  191. returnServerError('Cannot find article description!');
  192. return $this->fixUrl($description->innertext);
  193. }
  194. /**
  195. * Returns the full description from a given uri
  196. */
  197. private function extractFullDescription($uri){
  198. // Load full article
  199. $html = getSimpleHTMLDOMCached($uri);
  200. if($html === false)
  201. returnServerError('Could not load full description!');
  202. // Find the article
  203. $article = $html->find('article', 0);
  204. if(is_null($article))
  205. returnServerError('Cannot find article!');
  206. // Luckily they use the same layout for the review overview and full article pages :)
  207. return $this->extractArticleDescription($article);
  208. }
  209. }