FacebookBridge.php 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. <?php
  2. class FacebookBridge extends BridgeAbstract{
  3. public function loadMetadatas() {
  4. $this->maintainer = "teromene";
  5. $this->name = "Facebook";
  6. $this->uri = "http://www.facebook.com/";
  7. $this->description = "Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117";
  8. $this->parameters[] =array(
  9. 'u'=>array(
  10. 'name'=>'Username',
  11. 'required'=>true
  12. )
  13. );
  14. }
  15. public function collectData(array $param) {
  16. //Extract a string using start and end delimiters
  17. function ExtractFromDelimiters($string, $start, $end) {
  18. if (strpos($string, $start) !== false) {
  19. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  20. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  21. return $section_retrieved;
  22. } return false;
  23. }
  24. //Utility function for cleaning a Facebook link
  25. $unescape_fb_link = function ($matches) {
  26. if (is_array($matches) && count($matches) > 1) {
  27. $link = $matches[1];
  28. if (strpos($link, '/') === 0)
  29. $link = 'https://www.facebook.com'.$link.'"';
  30. if (strpos($link, 'facebook.com/l.php?u=') !== false)
  31. $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  32. return ' href="'.$link.'"';
  33. }
  34. };
  35. //Utility function for converting facebook emoticons
  36. $unescape_fb_emote = function ($matches) {
  37. static $facebook_emoticons = array(
  38. 'smile' => ':)',
  39. 'frown' => ':(',
  40. 'tongue' => ':P',
  41. 'grin' => ':D',
  42. 'gasp' => ':O',
  43. 'wink' => ';)',
  44. 'pacman' => ':<',
  45. 'grumpy' => '>_<',
  46. 'unsure' => ':/',
  47. 'cry' => ':\'(',
  48. 'kiki' => '^_^',
  49. 'glasses' => '8-)',
  50. 'sunglasses' => 'B-)',
  51. 'heart' => '<3',
  52. 'devil' => ']:D',
  53. 'angel' => '0:)',
  54. 'squint' => '-_-',
  55. 'confused' => 'o_O',
  56. 'upset' => 'xD',
  57. 'colonthree' => ':3',
  58. 'like' => '&#x1F44D;');
  59. $len = count($matches);
  60. if ($len > 1)
  61. for ($i = 1; $i < $len; $i++)
  62. foreach ($facebook_emoticons as $name => $emote)
  63. if ($matches[$i] === $name)
  64. return $emote;
  65. return $matches[0];
  66. };
  67. $html = null;
  68. //Handle captcha response sent by the viewer
  69. if (isset($_POST['captcha_response']))
  70. {
  71. if (session_status() == PHP_SESSION_NONE)
  72. session_start();
  73. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action']))
  74. {
  75. $captcha_action = $_SESSION['captcha_action'];
  76. $captcha_fields = $_SESSION['captcha_fields'];
  77. $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
  78. $http_options = array(
  79. 'http' => array(
  80. 'method' => 'POST',
  81. 'user_agent'=> ini_get('user_agent'),
  82. 'header'=>array("Content-type: application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"),
  83. 'content' => http_build_query($captcha_fields),
  84. ),
  85. );
  86. $context = stream_context_create($http_options);
  87. $html = $this->getContents($captcha_action, false, $context);
  88. if ($html === FALSE) { $this->returnServerError('Failed to submit captcha response back to Facebook'); }
  89. unset($_SESSION['captcha_fields']);
  90. $html = str_get_html($html);
  91. }
  92. unset($_SESSION['captcha_fields']);
  93. unset($_SESSION['captcha_action']);
  94. }
  95. //Retrieve page contents
  96. if (is_null($html)) {
  97. if (isset($param['u'])) {
  98. if (!strpos($param['u'], "/")) {
  99. $html = $this->getSimpleHTMLDOM('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.');
  100. } else {
  101. $html = $this->getSimpleHTMLDOM('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.');
  102. }
  103. } else {
  104. $this->returnClientError('You must specify a Facebook username.');
  105. }
  106. }
  107. //Handle captcha form?
  108. $captcha = $html->find('div.captcha_interstitial', 0);
  109. if (!is_null($captcha))
  110. {
  111. //Save form for submitting after getting captcha response
  112. if (session_status() == PHP_SESSION_NONE)
  113. session_start();
  114. $captcha_fields = array();
  115. foreach ($captcha->find('input, button') as $input)
  116. $captcha_fields[$input->name] = $input->value;
  117. $_SESSION['captcha_fields'] = $captcha_fields;
  118. $_SESSION['captcha_action'] = 'https://www.facebook.com'.$captcha->find('form', 0)->action;
  119. //Show captcha filling form to the viewer, proxying the captcha image
  120. $img = base64_encode($this->getContents($captcha->find('img', 0)->src));
  121. header('HTTP/1.1 500 '.Http::getMessageForCode(500));
  122. header('Content-Type: text/html');
  123. die('<form method="post" action="?'.$_SERVER['QUERY_STRING'].'">'
  124. .'<h2>Facebook captcha challenge</h2>'
  125. .'<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />'
  126. .'Facebook wants rss-bridge to resolve the following captcha:</p>'
  127. .'<p><img src="data:image/png;base64,'.$img.'" /></p>'
  128. .'<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />'
  129. .'<input type="submit" value="Submit!" /></p>'
  130. .'</form>');
  131. }
  132. //No captcha? We can carry on retrieving page contents :)
  133. $element = $html->find('#pagelet_timeline_main_column')[0]->children(0)->children(0)->children(0)->next_sibling()->children(0);
  134. if(isset($element)) {
  135. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  136. $profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&amp;height=200';
  137. $this->name = $author;
  138. foreach($element->children() as $post) {
  139. $item = array();
  140. if (count($post->find('abbr')) > 0) {
  141. //Retrieve post contents
  142. $content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
  143. $content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
  144. $content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
  145. $content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
  146. //Remove html nodes, keep only img, links, basic formatting
  147. $content = strip_tags($content,'<a><img><i><u>');
  148. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  149. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  150. //Clean useless html tag properties and fix link closing tags
  151. foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex',
  152. 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name)
  153. $content = preg_replace('/ '.$property_name.'=\"[^"]*\"/i', '', $content);
  154. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  155. //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  156. $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
  157. //Retrieve date of the post
  158. $date = $post->find("abbr")[0];
  159. if(isset($date) && $date->hasAttribute('data-utime')) {
  160. $date = $date->getAttribute('data-utime');
  161. } else {
  162. $date = 0;
  163. }
  164. //Build title from username and content
  165. $title = $author;
  166. if (strlen($title) > 24)
  167. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
  168. $title = $title.' | '.strip_tags($content);
  169. if (strlen($title) > 64)
  170. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
  171. //Build and add final item
  172. $item['uri'] = 'https://facebook.com'.$post->find('abbr')[0]->parent()->getAttribute('href');
  173. $item['content'] = $content;
  174. $item['title'] = $title;
  175. $item['author'] = $author;
  176. $item['timestamp'] = $date;
  177. $this->items[] = $item;
  178. }
  179. }
  180. }
  181. }
  182. public function setDatas(array $param){
  183. if (isset($param['captcha_response']))
  184. unset($param['captcha_response']);
  185. parent::setDatas($param);
  186. }
  187. public function getName() {
  188. return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
  189. }
  190. public function getCacheDuration() {
  191. return 300; // 5 minutes
  192. }
  193. }