FacebookBridge.php 8.9 KB

  1. <?php
  2. class FacebookBridge extends BridgeAbstract {
  3. const MAINTAINER = 'teromene';
  4. const NAME = 'Facebook';
  5. const URI = 'https://www.facebook.com/';
  6. const CACHE_TIMEOUT = 300; // 5min
  7. const DESCRIPTION = 'Input a page title or a profile log. For a profile log,
  8. please insert the parameter as follow : myExamplePage/132621766841117';
  9. const PARAMETERS = array( array(
  10. 'u' => array(
  11. 'name' => 'Username',
  12. 'required' => true
  13. ),
  14. 'media_type' => array(
  15. 'name' => 'Media type',
  16. 'type' => 'list',
  17. 'required' => false,
  18. 'values' => array(
  19. 'All' => 'all',
  20. 'Video' => 'video',
  21. 'No Video' => 'novideo'
  22. ),
  23. 'defaultValue' => 'all'
  24. )
  25. ));
  26. private $authorName = '';
  27. public function collectData(){
  28. //Extract a string using start and end delimiters
  29. function extractFromDelimiters($string, $start, $end){
  30. if(strpos($string, $start) !== false) {
  31. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  32. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  33. return $section_retrieved;
  34. }
  35. return false;
  36. }
  37. //Utility function for cleaning a Facebook link
  38. $unescape_fb_link = function($matches){
  39. if(is_array($matches) && count($matches) > 1) {
  40. $link = $matches[1];
  41. if(strpos($link, '/') === 0)
  42. $link = self::URI . $link;
  43. if(strpos($link, 'facebook.com/l.php?u=') !== false)
  44. $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  45. return ' href="' . $link . '"';
  46. }
  47. };
  48. //Utility function for converting facebook emoticons
  49. $unescape_fb_emote = function($matches){
  50. static $facebook_emoticons = array(
  51. 'smile' => ':)',
  52. 'frown' => ':(',
  53. 'tongue' => ':P',
  54. 'grin' => ':D',
  55. 'gasp' => ':O',
  56. 'wink' => ';)',
  57. 'pacman' => ':<',
  58. 'grumpy' => '>_<',
  59. 'unsure' => ':/',
  60. 'cry' => ':\'(',
  61. 'kiki' => '^_^',
  62. 'glasses' => '8-)',
  63. 'sunglasses' => 'B-)',
  64. 'heart' => '<3',
  65. 'devil' => ']:D',
  66. 'angel' => '0:)',
  67. 'squint' => '-_-',
  68. 'confused' => 'o_O',
  69. 'upset' => 'xD',
  70. 'colonthree' => ':3',
  71. 'like' => '&#x1F44D;');
  72. $len = count($matches);
  73. if ($len > 1)
  74. for ($i = 1; $i < $len; $i++)
  75. foreach ($facebook_emoticons as $name => $emote)
  76. if ($matches[$i] === $name)
  77. return $emote;
  78. return $matches[0];
  79. };
  80. $html = null;
  81. //Handle captcha response sent by the viewer
  82. if (isset($_POST['captcha_response'])) {
  83. if (session_status() == PHP_SESSION_NONE)
  84. session_start();
  85. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) {
  86. $captcha_action = $_SESSION['captcha_action'];
  87. $captcha_fields = $_SESSION['captcha_fields'];
  88. $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
  89. $header = array("Content-type:
  90. application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n");
  91. $opts = array(
  92. CURLOPT_POST => 1,
  93. CURLOPT_POSTFIELDS => http_build_query($captcha_fields)
  94. );
  95. $html = getContents($captcha_action, $header, $opts);
  96. if($html === false) {
  97. returnServerError('Failed to submit captcha response back to Facebook');
  98. }
  99. unset($_SESSION['captcha_fields']);
  100. $html = str_get_html($html);
  101. }
  102. unset($_SESSION['captcha_fields']);
  103. unset($_SESSION['captcha_action']);
  104. }
  105. //Retrieve page contents
  106. if(is_null($html)) {
  107. $header = array('Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n");
  108. // First character cannot be a forward slash
  109. if(strpos($this->getInput('u'), "/") === 0) {
  110. returnClientError('Remove leading slash "/" from the username!');
  111. }
  112. if(!strpos($this->getInput('u'), "/")) {
  113. $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', $header)
  114. or returnServerError('No results for this query.');
  115. } else {
  116. $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', $header)
  117. or returnServerError('No results for this query.');
  118. }
  119. }
  120. //Handle captcha form?
  121. $captcha = $html->find('div.captcha_interstitial', 0);
  122. if (!is_null($captcha)) {
  123. //Save form for submitting after getting captcha response
  124. if (session_status() == PHP_SESSION_NONE)
  125. session_start();
  126. $captcha_fields = array();
  127. foreach ($captcha->find('input, button') as $input)
  128. $captcha_fields[$input->name] = $input->value;
  129. $_SESSION['captcha_fields'] = $captcha_fields;
  130. $_SESSION['captcha_action'] = $captcha->find('form', 0)->action;
  131. //Show captcha filling form to the viewer, proxying the captcha image
  132. $img = base64_encode(getContents($captcha->find('img', 0)->src));
  133. http_response_code(500);
  134. header('Content-Type: text/html');
  135. $message = <<<EOD
  136. <form method="post" action="?{$_SERVER['QUERY_STRING']}">
  137. <h2>Facebook captcha challenge</h2>
  138. <p>Unfortunately, rss-bridge cannot fetch the requested page.<br />
  139. Facebook wants rss-bridge to resolve the following captcha:</p>
  140. <p><img src="data:image/png;base64,{$img}" /></p>
  141. <p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />
  142. <input type="submit" value="Submit!" /></p>
  143. </form>
  144. EOD;
  145. die($message);
  146. }
  147. //No captcha? We can carry on retrieving page contents :)
  148. $element = $html
  149. ->find('#pagelet_timeline_main_column')[0]
  150. ->children(0)
  151. ->children(0)
  152. ->children(0)
  153. ->next_sibling()
  154. ->children(0);
  155. if(isset($element)) {
  156. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  157. $profilePic = 'https://graph.facebook.com/'
  158. . $this->getInput('u')
  159. . '/picture?width=200&amp;height=200';
  160. $this->authorName = $author;
  161. foreach($element->children() as $cell) {
  162. // Manage summary posts
  163. if(strpos($cell->class, '_3xaf') !== false) {
  164. $posts = $cell->children();
  165. } else {
  166. $posts = array($cell);
  167. }
  168. foreach($posts as $post) {
  169. // Check media type
  170. switch($this->getInput('media_type')) {
  171. case 'all': break;
  172. case 'video':
  173. if(empty($post->find('[aria-label=Video]'))) continue 2;
  174. break;
  175. case 'novideo':
  176. if(!empty($post->find('[aria-label=Video]'))) continue 2;
  177. break;
  178. default: break;
  179. }
  180. $item = array();
  181. if(count($post->find('abbr')) > 0) {
  182. //Retrieve post contents
  183. $content = preg_replace(
  184. '/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i',
  185. '',
  186. $post);
  187. $content = preg_replace(
  188. '/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i',
  189. '',
  190. $content);
  191. $content = preg_replace(
  192. '/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i',
  193. '',
  194. $content);
  195. $content = preg_replace(
  196. '/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i',
  197. '',
  198. $content);
  199. //Remove html nodes, keep only img, links, basic formatting
  200. $content = strip_tags($content, '<a><img><i><u><br><p>');
  201. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  202. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  203. //Clean useless html tag properties and fix link closing tags
  204. foreach (array(
  205. 'onmouseover',
  206. 'onclick',
  207. 'target',
  208. 'ajaxify',
  209. 'tabindex',
  210. 'class',
  211. 'style',
  212. 'data-[^=]*',
  213. 'aria-[^=]*',
  214. 'role',
  215. 'rel',
  216. 'id') as $property_name)
  217. $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content);
  218. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  219. //Convert textual representation of emoticons eg
  220. //"<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  221. $content = preg_replace_callback(
  222. '/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i',
  223. $unescape_fb_emote,
  224. $content
  225. );
  226. //Retrieve date of the post
  227. $date = $post->find("abbr")[0];
  228. if(isset($date) && $date->hasAttribute('data-utime')) {
  229. $date = $date->getAttribute('data-utime');
  230. } else {
  231. $date = 0;
  232. }
  233. //Build title from username and content
  234. $title = $author;
  235. if(strlen($title) > 24)
  236. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
  237. $title = $title . ' | ' . strip_tags($content);
  238. if(strlen($title) > 64)
  239. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
  240. $uri = self::URI . $post->find('abbr')[0]->parent()->getAttribute('href');
  241. //Build and add final item
  242. $item['uri'] = htmlspecialchars_decode($uri);
  243. $item['content'] = htmlspecialchars_decode($content);
  244. $item['title'] = $title;
  245. $item['author'] = $author;
  246. $item['timestamp'] = $date;
  247. $this->items[] = $item;
  248. }
  249. }
  250. }
  251. }
  252. }
  253. public function getName(){
  254. if(!empty($this->authorName)) {
  255. return isset($this->extraInfos['name']) ? $this->extraInfos['name'] : $this->authorName
  256. . ' - Facebook Bridge';
  257. }
  258. return parent::getName();
  259. }
  260. }