FacebookBridge.php 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. <?php
  2. class FacebookBridge extends BridgeAbstract{
  3. const MAINTAINER = "teromene";
  4. const NAME = "Facebook";
  5. const URI = "https://www.facebook.com/";
  6. const CACHE_TIMEOUT = 300; // 5min
  7. const DESCRIPTION = "Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117";
  8. const PARAMETERS =array( array(
  9. 'u'=>array(
  10. 'name'=>'Username',
  11. 'required'=>true
  12. )
  13. ));
  14. private $authorName='';
  15. public function collectData(){
  16. //Extract a string using start and end delimiters
  17. function ExtractFromDelimiters($string, $start, $end) {
  18. if (strpos($string, $start) !== false) {
  19. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  20. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  21. return $section_retrieved;
  22. } return false;
  23. }
  24. //Utility function for cleaning a Facebook link
  25. $unescape_fb_link = function ($matches) {
  26. if (is_array($matches) && count($matches) > 1) {
  27. $link = $matches[1];
  28. if (strpos($link, '/') === 0)
  29. $link = self::URI.$link.'"';
  30. if (strpos($link, 'facebook.com/l.php?u=') !== false)
  31. $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  32. return ' href="'.$link.'"';
  33. }
  34. };
  35. //Utility function for converting facebook emoticons
  36. $unescape_fb_emote = function ($matches) {
  37. static $facebook_emoticons = array(
  38. 'smile' => ':)',
  39. 'frown' => ':(',
  40. 'tongue' => ':P',
  41. 'grin' => ':D',
  42. 'gasp' => ':O',
  43. 'wink' => ';)',
  44. 'pacman' => ':<',
  45. 'grumpy' => '>_<',
  46. 'unsure' => ':/',
  47. 'cry' => ':\'(',
  48. 'kiki' => '^_^',
  49. 'glasses' => '8-)',
  50. 'sunglasses' => 'B-)',
  51. 'heart' => '<3',
  52. 'devil' => ']:D',
  53. 'angel' => '0:)',
  54. 'squint' => '-_-',
  55. 'confused' => 'o_O',
  56. 'upset' => 'xD',
  57. 'colonthree' => ':3',
  58. 'like' => '&#x1F44D;');
  59. $len = count($matches);
  60. if ($len > 1)
  61. for ($i = 1; $i < $len; $i++)
  62. foreach ($facebook_emoticons as $name => $emote)
  63. if ($matches[$i] === $name)
  64. return $emote;
  65. return $matches[0];
  66. };
  67. $html = null;
  68. //Handle captcha response sent by the viewer
  69. if (isset($_POST['captcha_response']))
  70. {
  71. if (session_status() == PHP_SESSION_NONE)
  72. session_start();
  73. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action']))
  74. {
  75. $captcha_action = $_SESSION['captcha_action'];
  76. $captcha_fields = $_SESSION['captcha_fields'];
  77. $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
  78. $http_options = array(
  79. 'http' => array(
  80. 'method' => 'POST',
  81. 'user_agent'=> ini_get('user_agent'),
  82. 'header'=>array("Content-type: application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"),
  83. 'content' => http_build_query($captcha_fields),
  84. ),
  85. );
  86. $context = stream_context_create($http_options);
  87. $html = getContents($captcha_action, false, $context);
  88. if ($html === FALSE) { returnServerError('Failed to submit captcha response back to Facebook'); }
  89. unset($_SESSION['captcha_fields']);
  90. $html = str_get_html($html);
  91. }
  92. unset($_SESSION['captcha_fields']);
  93. unset($_SESSION['captcha_action']);
  94. }
  95. //Retrieve page contents
  96. if (is_null($html)) {
  97. if (!strpos($this->getInput('u'), "/")) {
  98. $html = getSimpleHTMLDOM(self::URI.urlencode($this->getInput('u')).'?_fb_noscript=1')
  99. or returnServerError('No results for this query.');
  100. } else {
  101. $html = getSimpleHTMLDOM(self::URI.'pages/'.$this->getInput('u').'?_fb_noscript=1')
  102. or returnServerError('No results for this query.');
  103. }
  104. }
  105. //Handle captcha form?
  106. $captcha = $html->find('div.captcha_interstitial', 0);
  107. if (!is_null($captcha))
  108. {
  109. //Save form for submitting after getting captcha response
  110. if (session_status() == PHP_SESSION_NONE)
  111. session_start();
  112. $captcha_fields = array();
  113. foreach ($captcha->find('input, button') as $input)
  114. $captcha_fields[$input->name] = $input->value;
  115. $_SESSION['captcha_fields'] = $captcha_fields;
  116. $_SESSION['captcha_action'] = self::URI.$captcha->find('form', 0)->action;
  117. //Show captcha filling form to the viewer, proxying the captcha image
  118. $img = base64_encode(getContents($captcha->find('img', 0)->src));
  119. header('HTTP/1.1 500 '.Http::getMessageForCode(500));
  120. header('Content-Type: text/html');
  121. die('<form method="post" action="?'.$_SERVER['QUERY_STRING'].'">'
  122. .'<h2>Facebook captcha challenge</h2>'
  123. .'<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />'
  124. .'Facebook wants rss-bridge to resolve the following captcha:</p>'
  125. .'<p><img src="data:image/png;base64,'.$img.'" /></p>'
  126. .'<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />'
  127. .'<input type="submit" value="Submit!" /></p>'
  128. .'</form>');
  129. }
  130. //No captcha? We can carry on retrieving page contents :)
  131. $element = $html->find('#pagelet_timeline_main_column')[0]->children(0)->children(0)->children(0)->next_sibling()->children(0);
  132. if(isset($element)) {
  133. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  134. $profilePic = 'https://graph.facebook.com/'.$this->getInput('u').'/picture?width=200&amp;height=200';
  135. $this->authorName = $author;
  136. foreach($element->children() as $post) {
  137. $item = array();
  138. if (count($post->find('abbr')) > 0) {
  139. //Retrieve post contents
  140. $content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
  141. $content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
  142. $content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
  143. $content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
  144. //Remove html nodes, keep only img, links, basic formatting
  145. $content = strip_tags($content,'<a><img><i><u>');
  146. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  147. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  148. //Clean useless html tag properties and fix link closing tags
  149. foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex',
  150. 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name)
  151. $content = preg_replace('/ '.$property_name.'=\"[^"]*\"/i', '', $content);
  152. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  153. //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  154. $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
  155. //Retrieve date of the post
  156. $date = $post->find("abbr")[0];
  157. if(isset($date) && $date->hasAttribute('data-utime')) {
  158. $date = $date->getAttribute('data-utime');
  159. } else {
  160. $date = 0;
  161. }
  162. //Build title from username and content
  163. $title = $author;
  164. if (strlen($title) > 24)
  165. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
  166. $title = $title.' | '.strip_tags($content);
  167. if (strlen($title) > 64)
  168. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
  169. //Build and add final item
  170. $item['uri'] = self::URI.$post->find('abbr')[0]->parent()->getAttribute('href');
  171. $item['content'] = $content;
  172. $item['title'] = $title;
  173. $item['author'] = $author;
  174. $item['timestamp'] = $date;
  175. $this->items[] = $item;
  176. }
  177. }
  178. }
  179. }
  180. public function getName() {
  181. return (isset($this->authorName) ? $this->authorName.' - ' : '').'Facebook Bridge';
  182. }
  183. }