FB2Bridge.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. <?php
  2. class FB2Bridge extends BridgeAbstract {
  3. const MAINTAINER = 'teromene';
  4. const NAME = 'Facebook Alternate';
  5. const URI = 'https://www.facebook.com/';
  6. const CACHE_TIMEOUT = 1000;
  7. const DESCRIPTION = 'Input a page title or a profile log. For a profile log,
  8. please insert the parameter as follow : myExamplePage/132621766841117';
  9. const PARAMETERS = array( array(
  10. 'u' => array(
  11. 'name' => 'Username',
  12. 'required' => true
  13. )
  14. ));
  15. public function collectData(){
  16. function extractFromDelimiters($string, $start, $end){
  17. if(strpos($string, $start) !== false) {
  18. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  19. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  20. return $section_retrieved;
  21. }
  22. return false;
  23. }
  24. //Utility function for cleaning a Facebook link
  25. $unescape_fb_link = function($matches){
  26. if(is_array($matches) && count($matches) > 1) {
  27. $link = $matches[1];
  28. if(strpos($link, '/') === 0)
  29. $link = self::URI . $link . '"';
  30. if(strpos($link, 'facebook.com/l.php?u=') !== false)
  31. $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  32. return ' href="' . $link . '"';
  33. }
  34. };
  35. //Utility function for converting facebook emoticons
  36. $unescape_fb_emote = function($matches){
  37. static $facebook_emoticons = array(
  38. 'smile' => ':)',
  39. 'frown' => ':(',
  40. 'tongue' => ':P',
  41. 'grin' => ':D',
  42. 'gasp' => ':O',
  43. 'wink' => ';)',
  44. 'pacman' => ':<',
  45. 'grumpy' => '>_<',
  46. 'unsure' => ':/',
  47. 'cry' => ':\'(',
  48. 'kiki' => '^_^',
  49. 'glasses' => '8-)',
  50. 'sunglasses' => 'B-)',
  51. 'heart' => '<3',
  52. 'devil' => ']:D',
  53. 'angel' => '0:)',
  54. 'squint' => '-_-',
  55. 'confused' => 'o_O',
  56. 'upset' => 'xD',
  57. 'colonthree' => ':3',
  58. 'like' => '&#x1F44D;');
  59. $len = count($matches);
  60. if ($len > 1)
  61. for ($i = 1; $i < $len; $i++)
  62. foreach ($facebook_emoticons as $name => $emote)
  63. if ($matches[$i] === $name)
  64. return $emote;
  65. return $matches[0];
  66. };
  67. if($this->getInput('u') !== null) {
  68. $page = 'https://touch.facebook.com/' . $this->getInput('u');
  69. $cookies = $this->getCookies($page);
  70. $pageID = $this->getPageID($page, $cookies);
  71. if($pageID === null) {
  72. echo <<<EOD
  73. Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge
  74. EOD;
  75. die();
  76. } elseif($pageID == -1) {
  77. echo <<<EOD
  78. This page is not accessible without being logged in.
  79. EOD;
  80. die();
  81. }
  82. }
  83. //Build the string for the first request
  84. $requestString = 'https://touch.facebook.com/pages_reaction_units/more/?page_id='
  85. . $pageID
  86. . '&cursor={"card_id"%3A"videos"%2C"has_next_page"%3Atrue}&surface=mobile_page_home&unit_count=8';
  87. $fileContent = getContents($requestString);
  88. $articleIndex = 0;
  89. $maxArticle = 3;
  90. $html = $this->buildContent($fileContent);
  91. $author = $this->getInput('u');
  92. foreach($html->find('article') as $content) {
  93. $item = array();
  94. $item['uri'] = 'http://touch.facebook.com'
  95. . $content->find("div[class='_52jc _5qc4 _24u0 _36xo']", 0)->find('a', 0)->getAttribute('href');
  96. if($content->find('header', 0) !== null) {
  97. $content->find('header', 0)->innertext = '';
  98. }
  99. if($content->find('footer', 0) !== null) {
  100. $content->find('footer', 0)->innertext = '';
  101. }
  102. //Remove html nodes, keep only img, links, basic formatting
  103. $content = strip_tags($content, '<a><img><i><u><br><p>');
  104. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  105. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  106. //Clean useless html tag properties and fix link closing tags
  107. foreach (array(
  108. 'onmouseover',
  109. 'onclick',
  110. 'target',
  111. 'ajaxify',
  112. 'tabindex',
  113. 'class',
  114. 'style',
  115. 'data-[^=]*',
  116. 'aria-[^=]*',
  117. 'role',
  118. 'rel',
  119. 'id') as $property_name)
  120. $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content);
  121. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  122. //Convert textual representation of emoticons eg
  123. // "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  124. $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
  125. $item['content'] = $content;
  126. $title = $author;
  127. if (strlen($title) > 24)
  128. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
  129. $title = $title . ' | ' . strip_tags($content);
  130. if (strlen($title) > 64)
  131. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
  132. $item['title'] = $title;
  133. $item['author'] = $author;
  134. array_push($this->items, $item);
  135. }
  136. }
  137. // Currently not used. Is used to get more than only 3 elements, as they appear on another page.
  138. private function computeNextLink($string, $pageID){
  139. $regex = implode(
  140. '',
  141. array(
  142. '/timeline_unit',
  143. "\\\\\\\\u00253A1",
  144. "\\\\\\\\u00253A([0-9]*)",
  145. "\\\\\\\\u00253A([0-9]*)",
  146. "\\\\\\\\u00253A([0-9]*)",
  147. "\\\\\\\\u00253A([0-9]*)/"
  148. )
  149. );
  150. preg_match($regex, $string, $result);
  151. return implode(
  152. '',
  153. array(
  154. 'https://touch.facebook.com/pages_reaction_units/more/?page_id=',
  155. $pageID,
  156. '&cursor=%7B%22timeline_cursor%22%3A%22timeline_unit%3A1%3A',
  157. $result[1],
  158. '%3A',
  159. $result[2],
  160. '%3A',
  161. $result[3],
  162. '%3A',
  163. $result[4],
  164. '%22%2C%22timeline_section_cursor%22%3A%7B%7D%2C%22',
  165. 'has_next_page%22%3Atrue%7D&surface=mobile_page_home&unit_count=3'
  166. )
  167. );
  168. }
  169. //Builds the HTML from the encoded JS that Facebook provides.
  170. private function buildContent($pageContent){
  171. // The html ends with:
  172. // /div>","replaceifexists
  173. $regex = '/\\"html\\":(\".+\/div>"),"replace/';
  174. preg_match($regex, $pageContent, $result);
  175. return str_get_html(html_entity_decode(json_decode($result[1])));
  176. }
  177. //Builds the cookie from the page, as Facebook sometimes refuses to give
  178. //the page if no cookie is provided.
  179. private function getCookies($pageURL){
  180. $ctx = stream_context_create(array(
  181. 'http' => array(
  182. 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',
  183. 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
  184. )
  185. )
  186. );
  187. $a = file_get_contents($pageURL, 0, $ctx);
  188. //First request to get the cookie
  189. $cookies = '';
  190. foreach($http_response_header as $hdr) {
  191. if(strpos($hdr, 'Set-Cookie') !== false) {
  192. $cLine = explode(':', $hdr)[1];
  193. $cLine = explode(';', $cLine)[0];
  194. $cookies .= ';' . $cLine;
  195. }
  196. }
  197. return substr($cookies, 1);
  198. }
  199. //Get the page ID from the Facebook page.
  200. private function getPageID($page, $cookies){
  201. $context = stream_context_create(array(
  202. 'http' => array(
  203. 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',
  204. 'header' => 'Cookie: ' . $cookies
  205. )
  206. )
  207. );
  208. $pageContent = file_get_contents($page, 0, $context);
  209. if(strpos($pageContent, 'signup-button') != false) {
  210. return -1;
  211. }
  212. //Get the page ID if we don't have a captcha
  213. $regex = '/page_id=([0-9]*)&/';
  214. preg_match($regex, $pageContent, $matches);
  215. if(count($matches) > 0) {
  216. return $matches[1];
  217. }
  218. //Get the page ID if we do have a captcha
  219. $regex = '/"pageID":"([0-9]*)"/';
  220. preg_match($regex, $pageContent, $matches);
  221. return $matches[1];
  222. }
  223. public function getName(){
  224. return (isset($this->name) ? $this->name . ' - ' : '') . 'Facebook Bridge';
  225. }
  226. public function getURI(){
  227. return 'http://facebook.com';
  228. }
  229. }