VkBridge.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. <?php
  2. class VkBridge extends BridgeAbstract
  3. {
  4. const MAINTAINER = 'ahiles3005';
  5. const NAME = 'VK.com';
  6. const URI = 'https://vk.com/';
  7. const CACHE_TIMEOUT = 300; // 5min
  8. const DESCRIPTION = 'Working with open pages';
  9. const PARAMETERS = array(
  10. array(
  11. 'u' => array(
  12. 'name' => 'Group or user name',
  13. 'required' => true
  14. )
  15. )
  16. );
  17. protected $videos = array();
  18. protected $pageName;
  19. protected function getAccessToken()
  20. {
  21. return 'c8071613517c155c6cfbd2a059b2718e9c37b89094c4766834969dda75f657a2c1cbb49bab4c5e649f1db';
  22. }
  23. public function getURI()
  24. {
  25. if (!is_null($this->getInput('u'))) {
  26. return static::URI . urlencode($this->getInput('u'));
  27. }
  28. return parent::getURI();
  29. }
  30. public function getName()
  31. {
  32. if ($this->pageName) {
  33. return $this->pageName;
  34. }
  35. return parent::getName();
  36. }
  37. public function collectData()
  38. {
  39. $text_html = $this->getContents()
  40. or returnServerError('No results for group or user name "' . $this->getInput('u') . '".');
  41. $text_html = iconv('windows-1251', 'utf-8', $text_html);
  42. // makes album link generating work correctly
  43. $text_html = str_replace('"class="page_album_link">', '" class="page_album_link">', $text_html);
  44. $html = str_get_html($text_html);
  45. $pageName = $html->find('.page_name', 0);
  46. if (is_object($pageName)) {
  47. $pageName = $pageName->plaintext;
  48. $this->pageName = htmlspecialchars_decode($pageName);
  49. }
  50. foreach ($html->find('div.replies') as $comment_block) {
  51. $comment_block->outertext = '';
  52. }
  53. $html->load($html->save());
  54. $pinned_post_item = null;
  55. $last_post_id = 0;
  56. foreach ($html->find('.post') as $post) {
  57. defaultLinkTo($post, self::URI);
  58. $post_videos = array();
  59. $is_pinned_post = false;
  60. if (strpos($post->getAttribute('class'), 'post_fixed') !== false) {
  61. $is_pinned_post = true;
  62. }
  63. if (is_object($post->find('a.wall_post_more', 0))) {
  64. //delete link "show full" in content
  65. $post->find('a.wall_post_more', 0)->outertext = '';
  66. }
  67. $content_suffix = '';
  68. // looking for external links
  69. $external_link_selectors = array(
  70. 'a.page_media_link_title',
  71. 'div.page_media_link_title > a',
  72. 'div.media_desc > a.lnk',
  73. );
  74. foreach($external_link_selectors as $sel) {
  75. if (is_object($post->find($sel, 0))) {
  76. $a = $post->find($sel, 0);
  77. $innertext = $a->innertext;
  78. $parsed_url = parse_url($a->getAttribute('href'));
  79. if (strpos($parsed_url['path'], '/away.php') !== 0) continue;
  80. parse_str($parsed_url['query'], $parsed_query);
  81. $content_suffix .= "<br>External link: <a href='" . $parsed_query['to'] . "'>$innertext</a>";
  82. }
  83. }
  84. // remove external link from content
  85. $external_link_selectors_to_remove = array(
  86. 'div.page_media_thumbed_link',
  87. 'div.page_media_link_desc_wrap',
  88. 'div.media_desc > a.lnk',
  89. );
  90. foreach($external_link_selectors_to_remove as $sel) {
  91. if (is_object($post->find($sel, 0))) {
  92. $post->find($sel, 0)->outertext = '';
  93. }
  94. }
  95. // looking for article
  96. $article = $post->find('a.article_snippet', 0);
  97. if (is_object($article)) {
  98. if (strpos($article->getAttribute('class'), 'article_snippet_mini') !== false) {
  99. $article_title_selector = 'div.article_snippet_mini_title';
  100. $article_author_selector = 'div.article_snippet_mini_info > .mem_link,
  101. div.article_snippet_mini_info > .group_link';
  102. $article_thumb_selector = 'div.article_snippet_mini_thumb';
  103. } else {
  104. $article_title_selector = 'div.article_snippet__title';
  105. $article_author_selector = 'div.article_snippet__author';
  106. $article_thumb_selector = 'div.article_snippet__image';
  107. }
  108. $article_title = $article->find($article_title_selector, 0)->innertext;
  109. $article_author = $article->find($article_author_selector, 0)->innertext;
  110. $article_link = $article->getAttribute('href');
  111. $article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
  112. preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
  113. if (count($matches) > 0) {
  114. $content_suffix .= "<br><img src='" . $matches[1] . "'>";
  115. }
  116. $content_suffix .= "<br>Article: <a href='$article_link'>$article_title ($article_author)</a>";
  117. $article->outertext = '';
  118. }
  119. // get video on post
  120. $video = $post->find('div.post_video_desc', 0);
  121. $main_video_link = '';
  122. if (is_object($video)) {
  123. $video_title = $video->find('div.post_video_title', 0)->plaintext;
  124. $video_link = $video->find('a.lnk', 0)->getAttribute('href');
  125. $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
  126. $video->outertext = '';
  127. $main_video_link = $video_link;
  128. }
  129. // get all other videos
  130. foreach($post->find('a.page_post_thumb_video') as $a) {
  131. $video_title = htmlspecialchars_decode($a->getAttribute('aria-label'));
  132. $temp = explode(' ', $video_title, 2);
  133. if (count($temp) > 1) $video_title = $temp[1];
  134. $video_link = $a->getAttribute('href');
  135. if ($video_link != $main_video_link) $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
  136. $a->outertext = '';
  137. }
  138. // get all photos
  139. foreach($post->find('div.wall_text > a.page_post_thumb_wrap') as $a) {
  140. $result = $this->getPhoto($a);
  141. if ($result == null) continue;
  142. $a->outertext = '';
  143. $content_suffix .= "<br>$result";
  144. }
  145. // get albums
  146. foreach($post->find('.page_album_wrap') as $el) {
  147. $a = $el->find('.page_album_link', 0);
  148. $album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
  149. $album_link = $a->getAttribute('href');
  150. $el->outertext = '';
  151. $content_suffix .= "<br>Album: <a href='$album_link'>$album_title</a>";
  152. }
  153. // get photo documents
  154. foreach($post->find('a.page_doc_photo_href') as $a) {
  155. $doc_link = $a->getAttribute('href');
  156. $doc_gif_label_element = $a->find('.page_gif_label', 0);
  157. $doc_title_element = $a->find('.doc_label', 0);
  158. if (is_object($doc_gif_label_element)) {
  159. $gif_preview_img = backgroundToImg($a->find('.page_doc_photo', 0));
  160. $content_suffix .= "<br>Gif: <a href='$doc_link'>$gif_preview_img</a>";
  161. } else if (is_object($doc_title_element)) {
  162. $doc_title = $doc_title_element->innertext;
  163. $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
  164. } else {
  165. continue;
  166. }
  167. $a->outertext = '';
  168. }
  169. // get other documents
  170. foreach($post->find('div.page_doc_row') as $div) {
  171. $doc_title_element = $div->find('a.page_doc_title', 0);
  172. if (is_object($doc_title_element)) {
  173. $doc_title = $doc_title_element->innertext;
  174. $doc_link = $doc_title_element->getAttribute('href');
  175. $content_suffix .= "<br>Doc: <a href='$doc_link'>$doc_title</a>";
  176. } else {
  177. continue;
  178. }
  179. $div->outertext = '';
  180. }
  181. // get polls
  182. foreach($post->find('div.page_media_poll_wrap') as $div) {
  183. $poll_title = $div->find('.page_media_poll_title', 0)->innertext;
  184. $content_suffix .= "<br>Poll: $poll_title";
  185. foreach($div->find('div.page_poll_text') as $poll_stat_title) {
  186. $content_suffix .= '<br>- ' . $poll_stat_title->innertext;
  187. }
  188. $div->outertext = '';
  189. }
  190. // get sign
  191. $post_author = $pageName;
  192. foreach($post->find('a.wall_signed_by') as $a) {
  193. $post_author = $a->innertext;
  194. $a->outertext = '';
  195. }
  196. if (is_object($post->find('div.copy_quote', 0))) {
  197. $copy_quote = $post->find('div.copy_quote', 0);
  198. if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
  199. $copy_post_header->outertext = '';
  200. }
  201. $copy_quote_content = $copy_quote->innertext;
  202. $copy_quote->outertext = "<br>Reposted: <br>$copy_quote_content";
  203. }
  204. $item = array();
  205. $item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '<br><img>');
  206. $item['content'] .= $content_suffix;
  207. $item['categories'] = array();
  208. // get post hashtags
  209. foreach($post->find('a') as $a) {
  210. $href = $a->getAttribute('href');
  211. $prefix = '/feed?section=search&q=%23';
  212. $innertext = $a->innertext;
  213. if ($href && substr($href, 0, strlen($prefix)) === $prefix) {
  214. $item['categories'][] = urldecode(substr($href, strlen($prefix)));
  215. } else if (substr($innertext, 0, 1) == '#') {
  216. $item['categories'][] = $innertext;
  217. }
  218. }
  219. // get post link
  220. $post_link = $post->find('a.post_link', 0)->getAttribute('href');
  221. preg_match('/wall-?\d+_(\d+)/', $post_link, $preg_match_result);
  222. $item['post_id'] = intval($preg_match_result[1]);
  223. $item['uri'] = $post_link;
  224. $item['timestamp'] = $this->getTime($post);
  225. $item['title'] = $this->getTitle($item['content']);
  226. $item['author'] = $post_author;
  227. $item['videos'] = $post_videos;
  228. if ($is_pinned_post) {
  229. // do not append it now
  230. $pinned_post_item = $item;
  231. } else {
  232. $last_post_id = $item['post_id'];
  233. $this->items[] = $item;
  234. }
  235. }
  236. if (!is_null($pinned_post_item)) {
  237. if (count($this->items) == 0) {
  238. $this->items[] = $pinned_post_item;
  239. } else if ($last_post_id < $pinned_post_item['post_id']) {
  240. $this->items[] = $pinned_post_item;
  241. usort($this->items, function ($item1, $item2) {
  242. return $item2['post_id'] - $item1['post_id'];
  243. });
  244. }
  245. }
  246. $this->getCleanVideoLinks();
  247. }
  248. private function getPhoto($a) {
  249. $onclick = $a->getAttribute('onclick');
  250. preg_match('/return showPhoto\(.+?({.*})/', $onclick, $preg_match_result);
  251. if (count($preg_match_result) == 0) return;
  252. $arg = htmlspecialchars_decode( str_replace('queue:1', '"queue":1', $preg_match_result[1]) );
  253. $data = json_decode($arg, true);
  254. if ($data == null) return;
  255. $thumb = $data['temp']['base'] . $data['temp']['x_'][0] . '.jpg';
  256. $original = '';
  257. foreach(array('y_', 'z_', 'w_') as $key) {
  258. if (!isset($data['temp'][$key])) continue;
  259. if (!isset($data['temp'][$key][0])) continue;
  260. if (substr($data['temp'][$key][0], 0, 4) == 'http') {
  261. $base = '';
  262. } else {
  263. $base = $data['temp']['base'];
  264. }
  265. $original = $base . $data['temp'][$key][0] . '.jpg';
  266. }
  267. if ($original) {
  268. return "<a href='$original'><img src='$thumb'></a>";
  269. } else {
  270. return "<img src='$thumb'>";
  271. }
  272. }
  273. private function getTitle($content)
  274. {
  275. preg_match('/^["\w\ \p{Cyrillic}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result);
  276. if (count($result) == 0) return 'untitled';
  277. return $result[0];
  278. }
  279. private function getTime($post)
  280. {
  281. if ($time = $post->find('span.rel_date', 0)->getAttribute('time')) {
  282. return $time;
  283. } else {
  284. $strdate = $post->find('span.rel_date', 0)->plaintext;
  285. $date = date_parse($strdate);
  286. if (!$date['year']) {
  287. if (strstr($strdate, 'today') !== false) {
  288. $strdate = date('d-m-Y') . ' ' . $strdate;
  289. } elseif (strstr($strdate, 'yesterday ') !== false) {
  290. $time = time() - 60 * 60 * 24;
  291. $strdate = date('d-m-Y', $time) . ' ' . $strdate;
  292. } else {
  293. $strdate = $strdate . ' ' . date('Y');
  294. }
  295. $date = date_parse($strdate);
  296. }
  297. return strtotime($date['day'] . '-' . $date['month'] . '-' . $date['year'] . ' ' .
  298. $date['hour'] . ':' . $date['minute']);
  299. }
  300. }
  301. public function getContents()
  302. {
  303. ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0');
  304. $header = array('Accept-language: en', 'Cookie: remixlang=3');
  305. return getContents($this->getURI(), $header);
  306. }
  307. protected function appendVideo($video_title, $video_link, &$content_suffix, array &$post_videos)
  308. {
  309. if (!$video_title) $video_title = '(empty)';
  310. preg_match('/video([0-9-]+_[0-9]+)/', $video_link, $preg_match_result);
  311. if (count($preg_match_result) > 1) {
  312. $video_id = $preg_match_result[1];
  313. $this->videos[ $video_id ] = array(
  314. 'url' => $video_link,
  315. 'title' => $video_title,
  316. );
  317. $post_videos[] = $video_id;
  318. } else {
  319. $content_suffix .= '<br>Video: <a href="'.htmlspecialchars($video_link).'">'.$video_title.'</a>';
  320. }
  321. }
  322. protected function getCleanVideoLinks() {
  323. $result = $this->api('video.get', array(
  324. 'videos' => implode(',', array_keys($this->videos)),
  325. 'count' => 200
  326. ));
  327. if (isset($result['error'])) return;
  328. foreach($result['response']['items'] as $item) {
  329. $video_id = strval($item['owner_id']).'_'.strval($item['id']);
  330. $this->videos[$video_id]['url'] = $item['player'];
  331. }
  332. foreach($this->items as &$item) {
  333. foreach($item['videos'] as $video_id) {
  334. $video_link = $this->videos[$video_id]['url'];
  335. $video_title = $this->videos[$video_id]['title'];
  336. $item['content'] .= '<br>Video: <a href="'.htmlspecialchars($video_link).'">'.$video_title.'</a>';
  337. }
  338. unset($item['videos']);
  339. }
  340. }
  341. protected function api($method, array $params)
  342. {
  343. $params['v'] = '5.80';
  344. $params['access_token'] = $this->getAccessToken();
  345. return json_decode( getContents('https://api.vk.com/method/'.$method.'?'.http_build_query($params)), true );
  346. }
  347. }