array( 'name' => 'Username', 'required' => true ) )); public function collectData(){ function extractFromDelimiters($string, $start, $end){ if(strpos($string, $start) !== false){ $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); return $section_retrieved; } return false; } //Utility function for cleaning a Facebook link $unescape_fb_link = function($matches){ if(is_array($matches) && count($matches) > 1){ $link = $matches[1]; if(strpos($link, '/') === 0) $link = self::URI . $link . '"'; if(strpos($link, 'facebook.com/l.php?u=') !== false) $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); return ' href="' . $link . '"'; } }; //Utility function for converting facebook emoticons $unescape_fb_emote = function($matches){ static $facebook_emoticons = array( 'smile' => ':)', 'frown' => ':(', 'tongue' => ':P', 'grin' => ':D', 'gasp' => ':O', 'wink' => ';)', 'pacman' => ':<', 'grumpy' => '>_<', 'unsure' => ':/', 'cry' => ':\'(', 'kiki' => '^_^', 'glasses' => '8-)', 'sunglasses' => 'B-)', 'heart' => '<3', 'devil' => ']:D', 'angel' => '0:)', 'squint' => '-_-', 'confused' => 'o_O', 'upset' => 'xD', 'colonthree' => ':3', 'like' => '👍'); $len = count($matches); if ($len > 1) for ($i = 1; $i < $len; $i++) foreach ($facebook_emoticons as $name => $emote) if ($matches[$i] === $name) return $emote; return $matches[0]; }; if($this->getInput('u') !== null){ $page = 'https://touch.facebook.com/' . $this->getInput('u'); $cookies = $this->getCookies($page); $pageID = $this->getPageID($page, $cookies); if($pageID === null){ echo <<buildContent($fileContent); $author = $this->getInput('u'); foreach($html->find("article") as $content){ $item = array(); $item['uri'] = "http://touch.facebook.com" . $content->find("div._52jc", 0)->find("a", 0)->getAttribute("href"); $content->find("header", 0)->innertext = ""; $content->find("footer", 0)->innertext = ""; //Remove html nodes, keep only img, links, basic formatting $content = strip_tags($content, '

'); //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); //Clean useless html tag properties and fix link closing tags foreach (array( 'onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex', 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name) $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); $content = preg_replace('/<\/a [^>]+>/i', '', $content); //Convert textual representation of emoticons eg // "smile emoticon" back to ASCII emoticons eg ":)" $content = preg_replace_callback('/([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content); $item['content'] = $content; $title = $author; if (strlen($title) > 24) $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; $title = $title . ' | ' . strip_tags($content); if (strlen($title) > 64) $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; $item['title'] = $title; $item['author'] = $author; array_push($this->items, $item); } } // Currently not used. Is used to get more than only 3 elements, as they appear on another page. private function computeNextLink($string, $pageID){ $regex = implode( '', array( "/timeline_unit", "\\\\\\\\u00253A1", "\\\\\\\\u00253A([0-9]*)", "\\\\\\\\u00253A([0-9]*)", "\\\\\\\\u00253A([0-9]*)", "\\\\\\\\u00253A([0-9]*)/" ) ); preg_match($regex, $string, $result); return implode( '', array( "https://touch.facebook.com/pages_reaction_units/more/?page_id=", $pageID, "&cursor=%7B%22timeline_cursor%22%3A%22timeline_unit%3A1%3A", $result[1], "%3A", $result[2], "%3A", $result[3], "%3A", $result[4], "%22%2C%22timeline_section_cursor%22%3A%7B%7D%2C%22", "has_next_page%22%3Atrue%7D&surface=mobile_page_home&unit_count=3" ) ); } //Builds the HTML from the encoded JS that Facebook provides. private function buildContent($pageContent){ $regex = "/\\\"html\\\":\\\"(.*?)\\\",\\\"replace/"; preg_match($regex, $pageContent, $result); return str_get_html(html_entity_decode(json_decode('"' . $result[1] . '"'))); } //Builds the cookie from the page, as Facebook sometimes refuses to give //the page if no cookie is provided. private function getCookies($pageURL){ $ctx = stream_context_create(array( 'http' => array( 'user_agent' => "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0", 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' ) ) ); $a = file_get_contents($pageURL, 0, $ctx); //First request to get the cookie $cookies = ""; foreach($http_response_header as $hdr){ if(strpos($hdr, "Set-Cookie") !== false){ $cLine = explode(":", $hdr)[1]; $cLine = explode(";", $cLine)[0]; $cookies .= ";" . $cLine; } } return substr($cookies, 1); } //Get the page ID from the Facebook page. private function getPageID($page, $cookies){ $context = stream_context_create(array( 'http' => array( 'user_agent' => "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0", 'header' => 'Cookie: ' . $cookies ) ) ); $pageContent = file_get_contents($page, 0, $context); //Get the page ID if we don't have a captcha $regex = "/page_id=([0-9]*)&/"; preg_match($regex, $pageContent, $matches); if(count($matches) > 0){ return $matches[1]; } //Get the page ID if we do have a captcha $regex = "/\"pageID\":\"([0-9]*)\"/"; preg_match($regex, $pageContent, $matches); return $matches[1]; } public function getName(){ return (isset($this->name) ? $this->name . ' - ' : '') . 'Facebook Bridge'; } public function getURI(){ return 'http://facebook.com'; } public function getCacheDuration(){ return 60 * 60 * 3; // 5 minutes } }