VkBridge.php 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. <?php
  2. class VkBridge extends BridgeAbstract
  3. {
  4. const MAINTAINER = 'ahiles3005';
  5. const NAME = 'VK.com';
  6. const URI = 'https://vk.com/';
  7. const CACHE_TIMEOUT = 300; // 5min
  8. const DESCRIPTION = 'Working with open pages';
  9. const PARAMETERS = array(
  10. array(
  11. 'u' => array(
  12. 'name' => 'Group or user name',
  13. 'required' => true
  14. )
  15. )
  16. );
  17. protected $pageName;
  18. public function getURI()
  19. {
  20. if (!is_null($this->getInput('u'))) {
  21. return static::URI . urlencode($this->getInput('u'));
  22. }
  23. return parent::getURI();
  24. }
  25. public function getName()
  26. {
  27. if ($this->pageName) {
  28. return $this->pageName;
  29. }
  30. return parent::getName();
  31. }
  32. public function collectData()
  33. {
  34. $text_html = $this->getContents()
  35. or returnServerError('No results for group or user name "' . $this->getInput('u') . '".');
  36. $text_html = iconv('windows-1251', 'utf-8', $text_html);
  37. // makes album link generating work correctly
  38. $text_html = str_replace('"class="page_album_link">', '" class="page_album_link">', $text_html);
  39. $html = str_get_html($text_html);
  40. $pageName = $html->find('.page_name', 0);
  41. if (is_object($pageName)) {
  42. $pageName = $pageName->plaintext;
  43. $this->pageName = htmlspecialchars_decode($pageName);
  44. }
  45. $pinned_post_item = null;
  46. $last_post_id = 0;
  47. foreach ($html->find('.post') as $post) {
  48. $is_pinned_post = false;
  49. if (strpos($post->getAttribute('class'), 'post_fixed') !== false) {
  50. $is_pinned_post = true;
  51. }
  52. if (is_object($post->find('a.wall_post_more', 0))) {
  53. //delete link "show full" in content
  54. $post->find('a.wall_post_more', 0)->outertext = '';
  55. }
  56. $content_suffix = "";
  57. // looking for external links
  58. $external_link_selectors = array(
  59. 'a.page_media_link_title',
  60. 'div.page_media_link_title > a',
  61. 'div.media_desc > a.lnk',
  62. );
  63. foreach($external_link_selectors as $sel) {
  64. if (is_object($post->find($sel, 0))) {
  65. $a = $post->find($sel, 0);
  66. $innertext = $a->innertext;
  67. $parsed_url = parse_url($a->getAttribute('href'));
  68. if (strpos($parsed_url['path'], '/away.php') !== 0) continue;
  69. parse_str($parsed_url["query"], $parsed_query);
  70. $content_suffix .= "<br>External link: <a href='" . $parsed_query["to"] . "'>$innertext</a>";
  71. }
  72. }
  73. // remove external link from content
  74. $external_link_selectors_to_remove = array(
  75. 'div.page_media_thumbed_link',
  76. 'div.page_media_link_desc_wrap',
  77. 'div.media_desc > a.lnk',
  78. );
  79. foreach($external_link_selectors_to_remove as $sel) {
  80. if (is_object($post->find($sel, 0))) {
  81. $post->find($sel, 0)->outertext = '';
  82. }
  83. }
  84. // looking for article
  85. $article = $post->find("a.article_snippet", 0);
  86. if (is_object($article)) {
  87. $article_title = $article->find("div.article_snippet__title", 0)->innertext;
  88. $article_author = $article->find("div.article_snippet__author", 0)->innertext;
  89. $article_link = self::URI . ltrim($article->getAttribute('href'), '/');
  90. $article_img_element_style = $article->find("div.article_snippet__image", 0)->getAttribute('style');
  91. preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
  92. if (count($matches) > 0) {
  93. $content_suffix .= "<br><img src='" . $matches[1] . "'>";
  94. }
  95. $content_suffix .= "<br>Article: <a href='$article_link'>$article_title ($article_author)</a>";
  96. $article->outertext = '';
  97. }
  98. // get video on post
  99. $video = $post->find('div.post_video_desc', 0);
  100. if (is_object($video)) {
  101. $video_title = $video->find('div.post_video_title', 0)->plaintext;
  102. $video_link = self::URI . ltrim( $video->find('a.lnk', 0)->getAttribute('href'), '/' );
  103. $content_suffix .= "<br>Video: <a href='$video_link'>$video_title</a>";
  104. $video->outertext = '';
  105. }
  106. // get all photos
  107. foreach($post->find('div.wall_text > a.page_post_thumb_wrap') as $a) {
  108. $result = $this->getPhoto($a);
  109. if ($result == null) continue;
  110. $a->outertext = '';
  111. $content_suffix .= "<br>$result";
  112. }
  113. // get albums
  114. foreach($post->find('.page_album_wrap') as $el) {
  115. $a = $el->find('.page_album_link', 0);
  116. $album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
  117. $album_link = self::URI . ltrim($a->getAttribute('href'), '/');
  118. $el->outertext = '';
  119. $content_suffix .= "<br>Album: <a href='$album_link'>$album_title</a>";
  120. }
  121. // get photo documents
  122. foreach($post->find('a.page_doc_photo_href') as $a) {
  123. $doc_link = self::URI . ltrim($a->getAttribute('href'), '/');
  124. $doc_gif_label_element = $a->find(".page_gif_label", 0);
  125. $doc_title_element = $a->find(".doc_label", 0);
  126. if (is_object($doc_gif_label_element)) {
  127. $gif_preview_img = backgroundToImg($a->find('.page_doc_photo', 0));
  128. $content_suffix .= "<br>Gif: <a href='$doc_link'>$gif_preview_img</a>";
  129. } else if (is_object($doc_title_element)) {
  130. $doc_title = $doc_title_element->innertext;
  131. $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
  132. } else {
  133. continue;
  134. }
  135. $a->outertext = '';
  136. }
  137. // get other documents
  138. foreach($post->find('div.page_doc_row') as $div) {
  139. $doc_title_element = $div->find("a.page_doc_title", 0);
  140. if (is_object($doc_title_element)) {
  141. $doc_title = $doc_title_element->innertext;
  142. $doc_link = self::URI . ltrim($doc_title_element->getAttribute('href'), '/');
  143. $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
  144. } else {
  145. continue;
  146. }
  147. $div->outertext = '';
  148. }
  149. // get sign
  150. $post_author = $pageName;
  151. foreach($post->find('a.wall_signed_by') as $a) {
  152. $post_author = $a->innertext;
  153. $a->outertext = '';
  154. }
  155. if (is_object($post->find('div.copy_quote', 0))) {
  156. $copy_quote = $post->find('div.copy_quote', 0);
  157. if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
  158. $copy_post_header->outertext = '';
  159. }
  160. $copy_quote_content = $copy_quote->innertext;
  161. $copy_quote->outertext = "<br>Reposted: <br>$copy_quote_content";
  162. }
  163. $item = array();
  164. $item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '<br><img>');
  165. $item['content'] .= $content_suffix;
  166. // get post link
  167. $post_link = $post->find('a.post_link', 0)->getAttribute('href');
  168. preg_match("/wall-?\d+_(\d+)/", $post_link, $preg_match_result);
  169. $item['post_id'] = intval($preg_match_result[1]);
  170. if (substr(self::URI, -1) == '/') {
  171. $post_link = self::URI . ltrim($post_link, "/");
  172. } else {
  173. $post_link = self::URI . $post_link;
  174. }
  175. $item['uri'] = $post_link;
  176. $item['timestamp'] = $this->getTime($post);
  177. $item['title'] = $this->getTitle($item['content']);
  178. $item['author'] = $post_author;
  179. if ($is_pinned_post) {
  180. // do not append it now
  181. $pinned_post_item = $item;
  182. } else {
  183. $last_post_id = $item['post_id'];
  184. $this->items[] = $item;
  185. }
  186. }
  187. if (is_null($pinned_post_item)) {
  188. return;
  189. } else if (count($this->items) == 0) {
  190. $this->items[] = $pinned_post_item;
  191. } else if ($last_post_id < $pinned_post_item['post_id']) {
  192. $this->items[] = $pinned_post_item;
  193. usort($this->items, function ($item1, $item2) {
  194. return $item2['post_id'] - $item1['post_id'];
  195. });
  196. }
  197. }
  198. private function getPhoto($a) {
  199. $onclick = $a->getAttribute('onclick');
  200. preg_match('/return showPhoto\(.+?({.*})/', $onclick, $preg_match_result);
  201. if (count($preg_match_result) == 0) return;
  202. $arg = htmlspecialchars_decode( str_replace('queue:1', '"queue":1', $preg_match_result[1]) );
  203. $data = json_decode($arg, true);
  204. if ($data == null) return;
  205. $thumb = $data['temp']['base'] . $data['temp']['x_'][0] . ".jpg";
  206. $original = '';
  207. foreach(array('y_', 'z_', 'w_') as $key) {
  208. if (!isset($data['temp'][$key])) continue;
  209. $original = $data['temp']['base'] . $data['temp'][$key][0] . ".jpg";
  210. }
  211. if ($original) {
  212. return "<a href='$original'><img src='$thumb'></a>";
  213. } else {
  214. return "<img src='$thumb'>";
  215. }
  216. }
  217. private function getTitle($content)
  218. {
  219. preg_match('/^["\w\ \p{Cyrillic}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result);
  220. if (count($result) == 0) return "untitled";
  221. return $result[0];
  222. }
  223. private function getTime($post)
  224. {
  225. if ($time = $post->find('span.rel_date', 0)->getAttribute('time')) {
  226. return $time;
  227. } else {
  228. $strdate = $post->find('span.rel_date', 0)->plaintext;
  229. $date = date_parse($strdate);
  230. if (!$date['year']) {
  231. if (strstr($strdate, 'today') !== false) {
  232. $strdate = date('d-m-Y') . ' ' . $strdate;
  233. } elseif (strstr($strdate, 'yesterday ') !== false) {
  234. $time = time() - 60 * 60 * 24;
  235. $strdate = date('d-m-Y', $time) . ' ' . $strdate;
  236. } else {
  237. $strdate = $strdate . ' ' . date('Y');
  238. }
  239. $date = date_parse($strdate);
  240. }
  241. return strtotime($date['day'] . '-' . $date['month'] . '-' . $date['year'] . ' ' .
  242. $date['hour'] . ':' . $date['minute']);
  243. }
  244. }
  245. public function getContents()
  246. {
  247. ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0');
  248. $header = array('Accept-language: en', 'Cookie: remixlang=3');
  249. return getContents($this->getURI(), $header);
  250. }
  251. }