1
0

FacebookBridge.php 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. <?php
  2. class FacebookBridge extends BridgeAbstract {
  3. const MAINTAINER = 'teromene';
  4. const NAME = 'Facebook';
  5. const URI = 'https://www.facebook.com/';
  6. const CACHE_TIMEOUT = 300; // 5min
  7. const DESCRIPTION = 'Input a page title or a profile log. For a profile log,
  8. please insert the parameter as follow : myExamplePage/132621766841117';
  9. const PARAMETERS = array( array(
  10. 'u' => array(
  11. 'name' => 'Username',
  12. 'required' => true
  13. )
  14. ));
  15. private $authorName = '';
  16. public function collectData(){
  17. //Extract a string using start and end delimiters
  18. function extractFromDelimiters($string, $start, $end){
  19. if(strpos($string, $start) !== false){
  20. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  21. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  22. return $section_retrieved;
  23. }
  24. return false;
  25. }
  26. //Utility function for cleaning a Facebook link
  27. $unescape_fb_link = function($matches){
  28. if(is_array($matches) && count($matches) > 1){
  29. $link = $matches[1];
  30. if(strpos($link, '/') === 0)
  31. $link = self::URI . $link . '"';
  32. if(strpos($link, 'facebook.com/l.php?u=') !== false)
  33. $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  34. return ' href="' . $link . '"';
  35. }
  36. };
  37. //Utility function for converting facebook emoticons
  38. $unescape_fb_emote = function($matches){
  39. static $facebook_emoticons = array(
  40. 'smile' => ':)',
  41. 'frown' => ':(',
  42. 'tongue' => ':P',
  43. 'grin' => ':D',
  44. 'gasp' => ':O',
  45. 'wink' => ';)',
  46. 'pacman' => ':<',
  47. 'grumpy' => '>_<',
  48. 'unsure' => ':/',
  49. 'cry' => ':\'(',
  50. 'kiki' => '^_^',
  51. 'glasses' => '8-)',
  52. 'sunglasses' => 'B-)',
  53. 'heart' => '<3',
  54. 'devil' => ']:D',
  55. 'angel' => '0:)',
  56. 'squint' => '-_-',
  57. 'confused' => 'o_O',
  58. 'upset' => 'xD',
  59. 'colonthree' => ':3',
  60. 'like' => '&#x1F44D;');
  61. $len = count($matches);
  62. if ($len > 1)
  63. for ($i = 1; $i < $len; $i++)
  64. foreach ($facebook_emoticons as $name => $emote)
  65. if ($matches[$i] === $name)
  66. return $emote;
  67. return $matches[0];
  68. };
  69. $html = null;
  70. //Handle captcha response sent by the viewer
  71. if (isset($_POST['captcha_response']))
  72. {
  73. if (session_status() == PHP_SESSION_NONE)
  74. session_start();
  75. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action']))
  76. {
  77. $captcha_action = $_SESSION['captcha_action'];
  78. $captcha_fields = $_SESSION['captcha_fields'];
  79. $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
  80. $http_options = array(
  81. 'http' => array(
  82. 'method' => 'POST',
  83. 'user_agent' => ini_get('user_agent'),
  84. 'header' => array("Content-type:
  85. application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"),
  86. 'content' => http_build_query($captcha_fields)
  87. ),
  88. );
  89. $context = stream_context_create($http_options);
  90. $html = getContents($captcha_action, false, $context);
  91. if($html === false){
  92. returnServerError('Failed to submit captcha response back to Facebook');
  93. }
  94. unset($_SESSION['captcha_fields']);
  95. $html = str_get_html($html);
  96. }
  97. unset($_SESSION['captcha_fields']);
  98. unset($_SESSION['captcha_action']);
  99. }
  100. //Retrieve page contents
  101. if(is_null($html)){
  102. if(!strpos($this->getInput('u'), "/")){
  103. $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1')
  104. or returnServerError('No results for this query.');
  105. } else {
  106. $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1')
  107. or returnServerError('No results for this query.');
  108. }
  109. }
  110. //Handle captcha form?
  111. $captcha = $html->find('div.captcha_interstitial', 0);
  112. if (!is_null($captcha))
  113. {
  114. //Save form for submitting after getting captcha response
  115. if (session_status() == PHP_SESSION_NONE)
  116. session_start();
  117. $captcha_fields = array();
  118. foreach ($captcha->find('input, button') as $input)
  119. $captcha_fields[$input->name] = $input->value;
  120. $_SESSION['captcha_fields'] = $captcha_fields;
  121. $_SESSION['captcha_action'] = $captcha->find('form', 0)->action;
  122. //Show captcha filling form to the viewer, proxying the captcha image
  123. $img = base64_encode(getContents($captcha->find('img', 0)->src));
  124. header('HTTP/1.1 500 ' . Http::getMessageForCode(500));
  125. header('Content-Type: text/html');
  126. $message = <<<EOD
  127. <form method="post" action="?{$_SERVER['QUERY_STRING']}">
  128. <h2>Facebook captcha challenge</h2>
  129. <p>Unfortunately, rss-bridge cannot fetch the requested page.<br />
  130. Facebook wants rss-bridge to resolve the following captcha:</p>
  131. <p><img src="data:image/png;base64,{$img}" /></p>
  132. <p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />
  133. <input type="submit" value="Submit!" /></p>
  134. </form>
  135. EOD;
  136. die($message);
  137. }
  138. //No captcha? We can carry on retrieving page contents :)
  139. $element = $html
  140. ->find('#pagelet_timeline_main_column')[0]
  141. ->children(0)
  142. ->children(0)
  143. ->children(0)
  144. ->next_sibling()
  145. ->children(0);
  146. if(isset($element)){
  147. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  148. $profilePic = 'https://graph.facebook.com/'
  149. . $this->getInput('u')
  150. . '/picture?width=200&amp;height=200';
  151. $this->authorName = $author;
  152. foreach($element->children() as $post){
  153. // Ignore summary posts
  154. if(strpos($post->class, '_3xaf') !== false) continue;
  155. $item = array();
  156. if(count($post->find('abbr')) > 0){
  157. //Retrieve post contents
  158. $content = preg_replace(
  159. '/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i',
  160. '',
  161. $post);
  162. $content = preg_replace(
  163. '/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i',
  164. '',
  165. $content);
  166. $content = preg_replace(
  167. '/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i',
  168. '',
  169. $content);
  170. $content = preg_replace(
  171. '/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i',
  172. '',
  173. $content);
  174. //Remove html nodes, keep only img, links, basic formatting
  175. $content = strip_tags($content, '<a><img><i><u><br><p>');
  176. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  177. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  178. //Clean useless html tag properties and fix link closing tags
  179. foreach (array(
  180. 'onmouseover',
  181. 'onclick',
  182. 'target',
  183. 'ajaxify',
  184. 'tabindex',
  185. 'class',
  186. 'style',
  187. 'data-[^=]*',
  188. 'aria-[^=]*',
  189. 'role',
  190. 'rel',
  191. 'id') as $property_name)
  192. $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content);
  193. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  194. //Convert textual representation of emoticons eg
  195. //"<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  196. $content = preg_replace_callback(
  197. '/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i',
  198. $unescape_fb_emote,
  199. $content
  200. );
  201. //Retrieve date of the post
  202. $date = $post->find("abbr")[0];
  203. if(isset($date) && $date->hasAttribute('data-utime')){
  204. $date = $date->getAttribute('data-utime');
  205. } else {
  206. $date = 0;
  207. }
  208. //Build title from username and content
  209. $title = $author;
  210. if(strlen($title) > 24)
  211. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
  212. $title = $title . ' | ' . strip_tags($content);
  213. if(strlen($title) > 64)
  214. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
  215. //Build and add final item
  216. $item['uri'] = self::URI . $post->find('abbr')[0]->parent()->getAttribute('href');
  217. $item['content'] = $content;
  218. $item['title'] = $title;
  219. $item['author'] = $author;
  220. $item['timestamp'] = $date;
  221. $this->items[] = $item;
  222. }
  223. }
  224. }
  225. }
  226. public function getName(){
  227. return isset($this->extraInfos['name']) ? $this->extraInfos['name'] : $this->authorName . ' - Facebook Bridge';
  228. }
  229. }