diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php
index e152956..8be4bc6 100644
--- a/bridges/VkBridge.php
+++ b/bridges/VkBridge.php
@@ -43,40 +43,225 @@ class VkBridge extends BridgeAbstract
or returnServerError('No results for group or user name "' . $this->getInput('u') . '".');
$text_html = iconv('windows-1251', 'utf-8', $text_html);
+ // makes album link generating work correctly
+ $text_html = str_replace('"class="page_album_link">', '" class="page_album_link">', $text_html);
$html = str_get_html($text_html);
- $pageName = $html->find('.page_name', 0)->plaintext;
- $this->pageName = $pageName;
+ $pageName = $html->find('.page_name', 0);
+ if (is_object($pageName)) {
+ $pageName = $pageName->plaintext;
+ $this->pageName = htmlspecialchars_decode($pageName);
+ }
+ $pinned_post_item = null;
+ $last_post_id = 0;
foreach ($html->find('.post') as $post) {
+ $is_pinned_post = false;
+ if (strpos($post->getAttribute('class'), 'post_fixed') !== false) {
+ $is_pinned_post = true;
+ }
+
if (is_object($post->find('a.wall_post_more', 0))) {
//delete link "show full" in content
$post->find('a.wall_post_more', 0)->outertext = '';
}
+
+ $content_suffix = "";
+
+ // looking for external links
+ $external_link_selectors = array(
+ 'a.page_media_link_title',
+ 'div.page_media_link_title > a',
+ 'div.media_desc > a.lnk',
+ );
+
+ foreach($external_link_selectors as $sel) {
+ if (is_object($post->find($sel, 0))) {
+ $a = $post->find($sel, 0);
+ $innertext = $a->innertext;
+ $parsed_url = parse_url($a->getAttribute('href'));
+ if (strpos($parsed_url['path'], '/away.php') !== 0) continue;
+ parse_str($parsed_url["query"], $parsed_query);
+ $content_suffix .= "
External link: $innertext";
+ }
+ }
+
+ // remove external link from content
+ $external_link_selectors_to_remove = array(
+ 'div.page_media_thumbed_link',
+ 'div.page_media_link_desc_wrap',
+ 'div.media_desc > a.lnk',
+ );
+
+ foreach($external_link_selectors_to_remove as $sel) {
+ if (is_object($post->find($sel, 0))) {
+ $post->find($sel, 0)->outertext = '';
+ }
+ }
+
+ // looking for article
+ $article = $post->find("a.article_snippet", 0);
+ if (is_object($article)) {
+ $article_title = $article->find("div.article_snippet__title", 0)->innertext;
+ $article_author = $article->find("div.article_snippet__author", 0)->innertext;
+ $article_link = self::URI . ltrim($article->getAttribute('href'), '/');
+ $article_img_element_style = $article->find("div.article_snippet__image", 0)->getAttribute('style');
+ preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
+ if (count($matches) > 0) {
+ $content_suffix .= "
";
+ }
+ $content_suffix .= "
Article: $article_title ($article_author)";
+ $article->outertext = '';
+ }
+
+ // get video on post
+ $video = $post->find('div.post_video_desc', 0);
+ if (is_object($video)) {
+ $video_title = $video->find('div.post_video_title', 0)->plaintext;
+ $video_link = self::URI . ltrim( $video->find('a.lnk', 0)->getAttribute('href'), '/' );
+ $content_suffix .= "
Video: $video_title";
+ $video->outertext = '';
+ }
+
+ // get all photos
+ foreach($post->find('div.wall_text > a.page_post_thumb_wrap') as $a) {
+ $result = $this->getPhoto($a);
+ if ($result == null) continue;
+ $a->outertext = '';
+ $content_suffix .= "
$result";
+ }
+
+ // get albums
+ foreach($post->find('.page_album_wrap') as $el) {
+ $a = $el->find('.page_album_link', 0);
+ $album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
+ $album_link = self::URI . ltrim($a->getAttribute('href'), '/');
+ $el->outertext = '';
+ $content_suffix .= "
Album: $album_title";
+ }
+
+ // get photo documents
+ foreach($post->find('a.page_doc_photo_href') as $a) {
+ $doc_link = self::URI . ltrim($a->getAttribute('href'), '/');
+ $doc_gif_label_element = $a->find(".page_gif_label", 0);
+ $doc_title_element = $a->find(".doc_label", 0);
+
+ if (is_object($doc_gif_label_element)) {
+ $gif_preview_img = backgroundToImg($a->find('.page_doc_photo', 0));
+ $content_suffix .= "
Gif: $gif_preview_img";
+
+ } else if (is_object($doc_title_element)) {
+ $doc_title = $doc_title_element->innertext;
+ $content_suffix .= "
Doc: $doc_title";
+
+ } else {
+ continue;
+
+ }
+
+ $a->outertext = '';
+ }
+
+ // get other documents
+ foreach($post->find('div.page_doc_row') as $div) {
+ $doc_title_element = $div->find("a.page_doc_title", 0);
+
+ if (is_object($doc_title_element)) {
+ $doc_title = $doc_title_element->innertext;
+ $doc_link = self::URI . ltrim($doc_title_element->getAttribute('href'), '/');
+ $content_suffix .= "
Doc: $doc_title";
+
+ } else {
+ continue;
+
+ }
+
+ $div->outertext = '';
+ }
+
+ // get sign
+ $post_author = $pageName;
+ foreach($post->find('a.wall_signed_by') as $a) {
+ $post_author = $a->innertext;
+ $a->outertext = '';
+ }
+
+ if (is_object($post->find('div.copy_quote', 0))) {
+ $copy_quote = $post->find('div.copy_quote', 0);
+ if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
+ $copy_post_header->outertext = '';
+ }
+ $copy_quote_content = $copy_quote->innertext;
+ $copy_quote->outertext = "
Reposted:
$copy_quote_content";
+ }
+
$item = array();
$item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '
');
-
- if (is_object($post->find('a.page_media_link_title', 0))) {
- $link = $post->find('a.page_media_link_title', 0)->getAttribute('href');
- //external link in the post
- $item['content'] .= "\n\rExternal link: "
- . str_replace('/away.php?to=', '', urldecode($link));
- }
-
- //get video on post
- if (is_object($post->find('span.post_video_title_content', 0))) {
- $titleVideo = $post->find('span.post_video_title_content', 0)->plaintext;
- $linkToVideo = self::URI . $post->find('a.page_post_thumb_video', 0)->getAttribute('href');
- $item['content'] .= "\n\r {$titleVideo}: {$linkToVideo}";
- }
+ $item['content'] .= $content_suffix;
// get post link
- $item['uri'] = self::URI . $post->find('a.post_link', 0)->getAttribute('href');
+ $post_link = $post->find('a.post_link', 0)->getAttribute('href');
+ preg_match("/wall-?\d+_(\d+)/", $post_link, $preg_match_result);
+ $item['post_id'] = intval($preg_match_result[1]);
+ if (substr(self::URI, -1) == '/') {
+ $post_link = self::URI . ltrim($post_link, "/");
+ } else {
+ $post_link = self::URI . $post_link;
+ }
+ $item['uri'] = $post_link;
$item['timestamp'] = $this->getTime($post);
- $item['author'] = $pageName;
- $this->items[] = $item;
+ $item['title'] = $this->getTitle($item['content']);
+ $item['author'] = $post_author;
+ if ($is_pinned_post) {
+ // do not append it now
+ $pinned_post_item = $item;
+ } else {
+ $last_post_id = $item['post_id'];
+ $this->items[] = $item;
+ }
}
+
+ if (is_null($pinned_post_item)) {
+ return;
+ } else if (count($this->items) == 0) {
+ $this->items[] = $pinned_post_item;
+ } else if ($last_post_id < $pinned_post_item['post_id']) {
+ $this->items[] = $pinned_post_item;
+ usort($this->items, function ($item1, $item2) {
+ return $item2['post_id'] - $item1['post_id'];
+ });
+ }
+ }
+
+ private function getPhoto($a) {
+ $onclick = $a->getAttribute('onclick');
+ preg_match('/return showPhoto\(.+?({.*})/', $onclick, $preg_match_result);
+ if (count($preg_match_result) == 0) return;
+
+ $arg = htmlspecialchars_decode( str_replace('queue:1', '"queue":1', $preg_match_result[1]) );
+ $data = json_decode($arg, true);
+ if ($data == null) return;
+
+ $thumb = $data['temp']['base'] . $data['temp']['x_'][0] . ".jpg";
+ $original = '';
+ foreach(array('y_', 'z_', 'w_') as $key) {
+ if (!isset($data['temp'][$key])) continue;
+ $original = $data['temp']['base'] . $data['temp'][$key][0] . ".jpg";
+ }
+
+ if ($original) {
+ return "";
+ } else {
+ return "";
+ }
+ }
+
+ private function getTitle($content)
+ {
+ preg_match('/^["\w\ \p{Cyrillic}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result);
+ if (count($result) == 0) return "untitled";
+ return $result[0];
}
private function getTime($post)