FacebookBridge.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. <?php
  2. class FacebookBridge extends BridgeAbstract {
  3. const MAINTAINER = 'teromene';
  4. const NAME = 'Facebook';
  5. const URI = 'https://www.facebook.com/';
  6. const CACHE_TIMEOUT = 300; // 5min
  7. const DESCRIPTION = 'Input a page title or a profile log. For a profile log,
  8. please insert the parameter as follow : myExamplePage/132621766841117';
  9. const PARAMETERS = array( array(
  10. 'u' => array(
  11. 'name' => 'Username',
  12. 'required' => true
  13. ),
  14. 'media_type' => array(
  15. 'name' => 'Media type',
  16. 'type' => 'list',
  17. 'required' => false,
  18. 'values' => array(
  19. 'All' => 'all',
  20. 'Video' => 'video',
  21. 'No Video' => 'novideo'
  22. ),
  23. 'defaultValue' => 'all'
  24. ),
  25. 'skip_reviews' => array(
  26. 'name' => 'Skip reviews',
  27. 'type' => 'checkbox',
  28. 'required' => false,
  29. 'defaultValue' => false,
  30. 'title' => 'Feed includes reviews when checked'
  31. )
  32. ));
  33. private $authorName = '';
  34. public function collectData(){
  35. //Extract a string using start and end delimiters
  36. function extractFromDelimiters($string, $start, $end){
  37. if(strpos($string, $start) !== false) {
  38. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  39. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  40. return $section_retrieved;
  41. }
  42. return false;
  43. }
  44. //Utility function for cleaning a Facebook link
  45. $unescape_fb_link = function($matches){
  46. if(is_array($matches) && count($matches) > 1) {
  47. $link = $matches[1];
  48. if(strpos($link, '/') === 0)
  49. $link = self::URI . $link;
  50. if(strpos($link, 'facebook.com/l.php?u=') !== false)
  51. $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  52. return ' href="' . $link . '"';
  53. }
  54. };
  55. //Utility function for converting facebook emoticons
  56. $unescape_fb_emote = function($matches){
  57. static $facebook_emoticons = array(
  58. 'smile' => ':)',
  59. 'frown' => ':(',
  60. 'tongue' => ':P',
  61. 'grin' => ':D',
  62. 'gasp' => ':O',
  63. 'wink' => ';)',
  64. 'pacman' => ':<',
  65. 'grumpy' => '>_<',
  66. 'unsure' => ':/',
  67. 'cry' => ':\'(',
  68. 'kiki' => '^_^',
  69. 'glasses' => '8-)',
  70. 'sunglasses' => 'B-)',
  71. 'heart' => '<3',
  72. 'devil' => ']:D',
  73. 'angel' => '0:)',
  74. 'squint' => '-_-',
  75. 'confused' => 'o_O',
  76. 'upset' => 'xD',
  77. 'colonthree' => ':3',
  78. 'like' => '&#x1F44D;');
  79. $len = count($matches);
  80. if ($len > 1)
  81. for ($i = 1; $i < $len; $i++)
  82. foreach ($facebook_emoticons as $name => $emote)
  83. if ($matches[$i] === $name)
  84. return $emote;
  85. return $matches[0];
  86. };
  87. $html = null;
  88. //Handle captcha response sent by the viewer
  89. if (isset($_POST['captcha_response'])) {
  90. if (session_status() == PHP_SESSION_NONE)
  91. session_start();
  92. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) {
  93. $captcha_action = $_SESSION['captcha_action'];
  94. $captcha_fields = $_SESSION['captcha_fields'];
  95. $captcha_fields['captcha_response'] = preg_replace('/[^a-zA-Z0-9]+/', '', $_POST['captcha_response']);
  96. $header = array("Content-type:
  97. application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n");
  98. $opts = array(
  99. CURLOPT_POST => 1,
  100. CURLOPT_POSTFIELDS => http_build_query($captcha_fields)
  101. );
  102. $html = getContents($captcha_action, $header, $opts);
  103. if($html === false) {
  104. returnServerError('Failed to submit captcha response back to Facebook');
  105. }
  106. unset($_SESSION['captcha_fields']);
  107. $html = str_get_html($html);
  108. }
  109. unset($_SESSION['captcha_fields']);
  110. unset($_SESSION['captcha_action']);
  111. }
  112. //Retrieve page contents
  113. if(is_null($html)) {
  114. $header = array('Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n");
  115. // Check if the user provided a fully qualified URL
  116. if (filter_var($this->getInput('u'), FILTER_VALIDATE_URL)) {
  117. $urlparts = parse_url($this->getInput('u'));
  118. if($urlparts['host'] !== parse_url(self::URI)['host']) {
  119. returnClientError('The host you provided is invalid! Received "'
  120. . $urlparts['host']
  121. . '", expected "'
  122. . parse_url(self::URI)['host']
  123. . '"!');
  124. }
  125. if(!array_key_exists('path', $urlparts)
  126. || $urlparts['path'] === '/') {
  127. returnClientError('The URL you provided doesn\'t contain the user name!');
  128. }
  129. $user = explode('/', $urlparts['path'])[1];
  130. $html = getSimpleHTMLDOM(self::URI . urlencode($user) . '?_fb_noscript=1', $header)
  131. or returnServerError('No results for this query.');
  132. } else {
  133. // First character cannot be a forward slash
  134. if(strpos($this->getInput('u'), '/') === 0) {
  135. returnClientError('Remove leading slash "/" from the username!');
  136. }
  137. if(!strpos($this->getInput('u'), '/')) {
  138. $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', $header)
  139. or returnServerError('No results for this query.');
  140. } else {
  141. $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', $header)
  142. or returnServerError('No results for this query.');
  143. }
  144. }
  145. }
  146. //Handle captcha form?
  147. $captcha = $html->find('div.captcha_interstitial', 0);
  148. if (!is_null($captcha)) {
  149. //Save form for submitting after getting captcha response
  150. if (session_status() == PHP_SESSION_NONE)
  151. session_start();
  152. $captcha_fields = array();
  153. foreach ($captcha->find('input, button') as $input)
  154. $captcha_fields[$input->name] = $input->value;
  155. $_SESSION['captcha_fields'] = $captcha_fields;
  156. $_SESSION['captcha_action'] = $captcha->find('form', 0)->action;
  157. //Show captcha filling form to the viewer, proxying the captcha image
  158. $img = base64_encode(getContents($captcha->find('img', 0)->src));
  159. http_response_code(500);
  160. header('Content-Type: text/html');
  161. $message = <<<EOD
  162. <form method="post" action="?{$_SERVER['QUERY_STRING']}">
  163. <h2>Facebook captcha challenge</h2>
  164. <p>Unfortunately, rss-bridge cannot fetch the requested page.<br />
  165. Facebook wants rss-bridge to resolve the following captcha:</p>
  166. <p><img src="data:image/png;base64,{$img}" /></p>
  167. <p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />
  168. <input type="submit" value="Submit!" /></p>
  169. </form>
  170. EOD;
  171. die($message);
  172. }
  173. //No captcha? We can carry on retrieving page contents :)
  174. //First, we check wether the page is public or not
  175. $loginForm = $html->find('._585r', 0);
  176. if($loginForm != null) {
  177. returnServerError('You must be logged in to view this page. This is not supported by RSS-Bridge.');
  178. }
  179. $element = $html
  180. ->find('#pagelet_timeline_main_column')[0]
  181. ->children(0)
  182. ->children(0)
  183. ->children(0)
  184. ->next_sibling()
  185. ->children(0);
  186. if(isset($element)) {
  187. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  188. $profilePic = 'https://graph.facebook.com/'
  189. . $this->getInput('u')
  190. . '/picture?width=200&amp;height=200';
  191. $this->authorName = $author;
  192. foreach($element->children() as $cell) {
  193. // Manage summary posts
  194. if(strpos($cell->class, '_3xaf') !== false) {
  195. $posts = $cell->children();
  196. } else {
  197. $posts = array($cell);
  198. }
  199. // Optionally skip reviews
  200. if($this->getInput('skip_reviews')
  201. && !is_null($cell->find('#review_composer_container', 0))) {
  202. continue;
  203. }
  204. foreach($posts as $post) {
  205. // Check media type
  206. switch($this->getInput('media_type')) {
  207. case 'all': break;
  208. case 'video':
  209. if(empty($post->find('[aria-label=Video]'))) continue 2;
  210. break;
  211. case 'novideo':
  212. if(!empty($post->find('[aria-label=Video]'))) continue 2;
  213. break;
  214. default: break;
  215. }
  216. $item = array();
  217. if(count($post->find('abbr')) > 0) {
  218. //Retrieve post contents
  219. $content = preg_replace(
  220. '/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i',
  221. '',
  222. $post);
  223. $content = preg_replace(
  224. '/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i',
  225. '',
  226. $content);
  227. $content = preg_replace(
  228. '/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i',
  229. '',
  230. $content);
  231. $content = preg_replace(
  232. '/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i',
  233. '',
  234. $content);
  235. //Remove html nodes, keep only img, links, basic formatting
  236. $content = strip_tags($content, '<a><img><i><u><br><p>');
  237. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  238. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  239. //Clean useless html tag properties and fix link closing tags
  240. foreach (array(
  241. 'onmouseover',
  242. 'onclick',
  243. 'target',
  244. 'ajaxify',
  245. 'tabindex',
  246. 'class',
  247. 'style',
  248. 'data-[^=]*',
  249. 'aria-[^=]*',
  250. 'role',
  251. 'rel',
  252. 'id') as $property_name)
  253. $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content);
  254. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  255. //Convert textual representation of emoticons eg
  256. //"<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  257. $content = preg_replace_callback(
  258. '/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i',
  259. $unescape_fb_emote,
  260. $content
  261. );
  262. //Retrieve date of the post
  263. $date = $post->find('abbr')[0];
  264. if(isset($date) && $date->hasAttribute('data-utime')) {
  265. $date = $date->getAttribute('data-utime');
  266. } else {
  267. $date = 0;
  268. }
  269. //Build title from username and content
  270. $title = $author;
  271. if(strlen($title) > 24)
  272. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
  273. $title = $title . ' | ' . strip_tags($content);
  274. if(strlen($title) > 64)
  275. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
  276. $uri = self::URI . $post->find('abbr')[0]->parent()->getAttribute('href');
  277. //Build and add final item
  278. $item['uri'] = htmlspecialchars_decode($uri);
  279. $item['content'] = htmlspecialchars_decode($content);
  280. $item['title'] = $title;
  281. $item['author'] = $author;
  282. $item['timestamp'] = $date;
  283. $this->items[] = $item;
  284. }
  285. }
  286. }
  287. }
  288. }
  289. public function getName(){
  290. if(!empty($this->authorName)) {
  291. return isset($this->extraInfos['name']) ? $this->extraInfos['name'] : $this->authorName
  292. . ' - Facebook Bridge';
  293. }
  294. return parent::getName();
  295. }
  296. }