FacebookBridge.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. <?php
  2. class FacebookBridge extends BridgeAbstract{
  3. const MAINTAINER = "teromene";
  4. const NAME = "Facebook";
  5. const URI = "https://www.facebook.com/";
  6. const DESCRIPTION = "Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117";
  7. const PARAMETERS =array( array(
  8. 'u'=>array(
  9. 'name'=>'Username',
  10. 'required'=>true
  11. )
  12. ));
  13. private $authorName='';
  14. public function collectData(){
  15. //Extract a string using start and end delimiters
  16. function ExtractFromDelimiters($string, $start, $end) {
  17. if (strpos($string, $start) !== false) {
  18. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  19. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  20. return $section_retrieved;
  21. } return false;
  22. }
  23. //Utility function for cleaning a Facebook link
  24. $unescape_fb_link = function ($matches) {
  25. if (is_array($matches) && count($matches) > 1) {
  26. $link = $matches[1];
  27. if (strpos($link, '/') === 0)
  28. $link = self::URI.$link.'"';
  29. if (strpos($link, 'facebook.com/l.php?u=') !== false)
  30. $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  31. return ' href="'.$link.'"';
  32. }
  33. };
  34. //Utility function for converting facebook emoticons
  35. $unescape_fb_emote = function ($matches) {
  36. static $facebook_emoticons = array(
  37. 'smile' => ':)',
  38. 'frown' => ':(',
  39. 'tongue' => ':P',
  40. 'grin' => ':D',
  41. 'gasp' => ':O',
  42. 'wink' => ';)',
  43. 'pacman' => ':<',
  44. 'grumpy' => '>_<',
  45. 'unsure' => ':/',
  46. 'cry' => ':\'(',
  47. 'kiki' => '^_^',
  48. 'glasses' => '8-)',
  49. 'sunglasses' => 'B-)',
  50. 'heart' => '<3',
  51. 'devil' => ']:D',
  52. 'angel' => '0:)',
  53. 'squint' => '-_-',
  54. 'confused' => 'o_O',
  55. 'upset' => 'xD',
  56. 'colonthree' => ':3',
  57. 'like' => '&#x1F44D;');
  58. $len = count($matches);
  59. if ($len > 1)
  60. for ($i = 1; $i < $len; $i++)
  61. foreach ($facebook_emoticons as $name => $emote)
  62. if ($matches[$i] === $name)
  63. return $emote;
  64. return $matches[0];
  65. };
  66. $html = null;
  67. //Handle captcha response sent by the viewer
  68. if (isset($_POST['captcha_response']))
  69. {
  70. if (session_status() == PHP_SESSION_NONE)
  71. session_start();
  72. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action']))
  73. {
  74. $captcha_action = $_SESSION['captcha_action'];
  75. $captcha_fields = $_SESSION['captcha_fields'];
  76. $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
  77. $http_options = array(
  78. 'http' => array(
  79. 'method' => 'POST',
  80. 'user_agent'=> ini_get('user_agent'),
  81. 'header'=>array("Content-type: application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"),
  82. 'content' => http_build_query($captcha_fields),
  83. ),
  84. );
  85. $context = stream_context_create($http_options);
  86. $html = getContents($captcha_action, false, $context);
  87. if ($html === FALSE) { returnServerError('Failed to submit captcha response back to Facebook'); }
  88. unset($_SESSION['captcha_fields']);
  89. $html = str_get_html($html);
  90. }
  91. unset($_SESSION['captcha_fields']);
  92. unset($_SESSION['captcha_action']);
  93. }
  94. //Retrieve page contents
  95. if (is_null($html)) {
  96. if (!strpos($this->getInput('u'), "/")) {
  97. $html = getSimpleHTMLDOM(self::URI.urlencode($this->getInput('u')).'?_fb_noscript=1')
  98. or returnServerError('No results for this query.');
  99. } else {
  100. $html = getSimpleHTMLDOM(self::URI.'pages/'.$this->getInput('u').'?_fb_noscript=1')
  101. or returnServerError('No results for this query.');
  102. }
  103. }
  104. //Handle captcha form?
  105. $captcha = $html->find('div.captcha_interstitial', 0);
  106. if (!is_null($captcha))
  107. {
  108. //Save form for submitting after getting captcha response
  109. if (session_status() == PHP_SESSION_NONE)
  110. session_start();
  111. $captcha_fields = array();
  112. foreach ($captcha->find('input, button') as $input)
  113. $captcha_fields[$input->name] = $input->value;
  114. $_SESSION['captcha_fields'] = $captcha_fields;
  115. $_SESSION['captcha_action'] = self::URI.$captcha->find('form', 0)->action;
  116. //Show captcha filling form to the viewer, proxying the captcha image
  117. $img = base64_encode(getContents($captcha->find('img', 0)->src));
  118. header('HTTP/1.1 500 '.Http::getMessageForCode(500));
  119. header('Content-Type: text/html');
  120. die('<form method="post" action="?'.$_SERVER['QUERY_STRING'].'">'
  121. .'<h2>Facebook captcha challenge</h2>'
  122. .'<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />'
  123. .'Facebook wants rss-bridge to resolve the following captcha:</p>'
  124. .'<p><img src="data:image/png;base64,'.$img.'" /></p>'
  125. .'<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />'
  126. .'<input type="submit" value="Submit!" /></p>'
  127. .'</form>');
  128. }
  129. //No captcha? We can carry on retrieving page contents :)
  130. $element = $html->find('#pagelet_timeline_main_column')[0]->children(0)->children(0)->children(0)->next_sibling()->children(0);
  131. if(isset($element)) {
  132. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  133. $profilePic = 'https://graph.facebook.com/'.$this->getInput('u').'/picture?width=200&amp;height=200';
  134. $this->authorName = $author;
  135. foreach($element->children() as $post) {
  136. $item = array();
  137. if (count($post->find('abbr')) > 0) {
  138. //Retrieve post contents
  139. $content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
  140. $content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
  141. $content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
  142. $content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
  143. //Remove html nodes, keep only img, links, basic formatting
  144. $content = strip_tags($content,'<a><img><i><u>');
  145. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  146. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  147. //Clean useless html tag properties and fix link closing tags
  148. foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex',
  149. 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name)
  150. $content = preg_replace('/ '.$property_name.'=\"[^"]*\"/i', '', $content);
  151. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  152. //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  153. $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
  154. //Retrieve date of the post
  155. $date = $post->find("abbr")[0];
  156. if(isset($date) && $date->hasAttribute('data-utime')) {
  157. $date = $date->getAttribute('data-utime');
  158. } else {
  159. $date = 0;
  160. }
  161. //Build title from username and content
  162. $title = $author;
  163. if (strlen($title) > 24)
  164. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
  165. $title = $title.' | '.strip_tags($content);
  166. if (strlen($title) > 64)
  167. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
  168. //Build and add final item
  169. $item['uri'] = self::URI.$post->find('abbr')[0]->parent()->getAttribute('href');
  170. $item['content'] = $content;
  171. $item['title'] = $title;
  172. $item['author'] = $author;
  173. $item['timestamp'] = $date;
  174. $this->items[] = $item;
  175. }
  176. }
  177. }
  178. }
  179. public function getName() {
  180. return (isset($this->authorName) ? $this->authorName.' - ' : '').'Facebook Bridge';
  181. }
  182. public function getCacheDuration() {
  183. return 300; // 5 minutes
  184. }
  185. }