FacebookBridge.php 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. <?php
  2. class FacebookBridge extends BridgeAbstract{
  3. public function loadMetadatas() {
  4. $this->maintainer = "teromene";
  5. $this->name = "Facebook";
  6. $this->uri = "http://www.facebook.com/";
  7. $this->description = "Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117";
  8. $this->update = "31/03/2016";
  9. $this->parameters[] =
  10. '[
  11. {
  12. "name" : "Username",
  13. "identifier" : "u",
  14. "required" : "required"
  15. }
  16. ]';
  17. }
  18. public function collectData(array $param) {
  19. //Extract a string using start and end delimiters
  20. function ExtractFromDelimiters($string, $start, $end) {
  21. if (strpos($string, $start) !== false) {
  22. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  23. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  24. return $section_retrieved;
  25. } return false;
  26. }
  27. //Utility function for cleaning a Facebook link
  28. $unescape_fb_link = function ($matches) {
  29. if (is_array($matches) && count($matches) > 1) {
  30. $link = $matches[1];
  31. if (strpos($link, '/') === 0)
  32. $link = 'https://www.facebook.com'.$link.'"';
  33. if (strpos($link, 'facebook.com/l.php?u=') !== false)
  34. $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
  35. return ' href="'.$link.'"';
  36. }
  37. };
  38. //Utility function for converting facebook emoticons
  39. $unescape_fb_emote = function ($matches) {
  40. static $facebook_emoticons = array(
  41. 'smile' => ':)',
  42. 'frown' => ':(',
  43. 'tongue' => ':P',
  44. 'grin' => ':D',
  45. 'gasp' => ':O',
  46. 'wink' => ';)',
  47. 'pacman' => ':<',
  48. 'grumpy' => '>_<',
  49. 'unsure' => ':/',
  50. 'cry' => ':\'(',
  51. 'kiki' => '^_^',
  52. 'glasses' => '8-)',
  53. 'sunglasses' => 'B-)',
  54. 'heart' => '<3',
  55. 'devil' => ']:D',
  56. 'angel' => '0:)',
  57. 'squint' => '-_-',
  58. 'confused' => 'o_O',
  59. 'upset' => 'xD',
  60. 'colonthree' => ':3',
  61. 'like' => '&#x1F44D;');
  62. $len = count($matches);
  63. if ($len > 1)
  64. for ($i = 1; $i < $len; $i++)
  65. foreach ($facebook_emoticons as $name => $emote)
  66. if ($matches[$i] === $name)
  67. return $emote;
  68. return $matches[0];
  69. };
  70. $html = null;
  71. //Handle captcha response sent by the viewer
  72. if (isset($_POST['captcha_response']))
  73. {
  74. if (session_status() == PHP_SESSION_NONE)
  75. session_start();
  76. if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action']))
  77. {
  78. $captcha_action = $_SESSION['captcha_action'];
  79. $captcha_fields = $_SESSION['captcha_fields'];
  80. $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
  81. $http_options = array(
  82. 'http' => array(
  83. 'method' => 'POST',
  84. 'user_agent'=> ini_get('user_agent'),
  85. 'header'=>array("Content-type: application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"),
  86. 'content' => http_build_query($captcha_fields),
  87. ),
  88. );
  89. $context = stream_context_create($http_options);
  90. $html = file_get_contents($captcha_action, false, $context);
  91. if ($html === FALSE) { $this->returnError('Failed to submit captcha response back to Facebook', 500); }
  92. unset($_SESSION['captcha_fields']);
  93. $html = str_get_html($html);
  94. }
  95. unset($_SESSION['captcha_fields']);
  96. unset($_SESSION['captcha_action']);
  97. }
  98. //Retrieve page contents
  99. if (is_null($html)) {
  100. if (isset($param['u'])) {
  101. if (!strpos($param['u'], "/")) {
  102. $html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
  103. } else {
  104. $html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
  105. }
  106. } else {
  107. $this->returnError('You must specify a Facebook username.', 400);
  108. }
  109. }
  110. //Handle captcha form?
  111. $captcha = $html->find('div.captcha_interstitial', 0);
  112. if (!is_null($captcha))
  113. {
  114. //Save form for submitting after getting captcha response
  115. if (session_status() == PHP_SESSION_NONE)
  116. session_start();
  117. $captcha_fields = array();
  118. foreach ($captcha->find('input, button') as $input)
  119. $captcha_fields[$input->name] = $input->value;
  120. $_SESSION['captcha_fields'] = $captcha_fields;
  121. $_SESSION['captcha_action'] = 'https://www.facebook.com'.$captcha->find('form', 0)->action;
  122. //Show captcha filling form to the viewer, proxying the captcha image
  123. $img = base64_encode(file_get_contents($captcha->find('img', 0)->src));
  124. header('HTTP/1.1 500 '.Http::getMessageForCode(500));
  125. header('Content-Type: text/html');
  126. die('<form method="post" action="?'.$_SERVER['QUERY_STRING'].'">'
  127. .'<h2>Facebook captcha challenge</h2>'
  128. .'<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />'
  129. .'Facebook wants rss-bridge to resolve the following captcha:</p>'
  130. .'<p><img src="data:image/png;base64,'.$img.'" /></p>'
  131. .'<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />'
  132. .'<input type="submit" value="Submit!" /></p>'
  133. .'</form>');
  134. }
  135. //No captcha? We can carry on retrieving page contents :)
  136. $element = $html->find('#pagelet_timeline_main_column')[0]->children(0)->children(0)->children(0)->next_sibling()->children(0);
  137. if(isset($element)) {
  138. $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
  139. $profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&amp;height=200';
  140. $this->name = $author;
  141. foreach($element->children() as $post) {
  142. $item = new \Item();
  143. if (count($post->find('abbr')) > 0) {
  144. //Retrieve post contents
  145. $content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
  146. $content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
  147. $content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
  148. $content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
  149. //Remove html nodes, keep only img, links, basic formatting
  150. $content = strip_tags($content,'<a><img><i><u>');
  151. //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
  152. $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
  153. //Clean useless html tag properties and fix link closing tags
  154. foreach (array('onmouseover', 'onclick', 'target', 'ajaxify', 'tabindex',
  155. 'class', 'style', 'data-[^=]*', 'aria-[^=]*', 'role', 'rel', 'id') as $property_name)
  156. $content = preg_replace('/ '.$property_name.'=\"[^"]*\"/i', '', $content);
  157. $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
  158. //Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
  159. $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
  160. //Retrieve date of the post
  161. $date = $post->find("abbr")[0];
  162. if(isset($date) && $date->hasAttribute('data-utime')) {
  163. $date = $date->getAttribute('data-utime');
  164. } else {
  165. $date = 0;
  166. }
  167. //Build title from username and content
  168. $title = $author;
  169. if (strlen($title) > 24)
  170. $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
  171. $title = $title.' | '.strip_tags($content);
  172. if (strlen($title) > 64)
  173. $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
  174. //Use first image as thumbnail if available, or profile pic fallback
  175. $thumbnail = $post->find('img', 1);
  176. if (is_object($thumbnail))
  177. $thumbnail = $thumbnail->src;
  178. else $thumbnail = $profilePic;
  179. //Build and add final item
  180. $item->uri = 'https://facebook.com'.$post->find('abbr')[0]->parent()->getAttribute('href');
  181. $item->thumbnailUri = $thumbnail;
  182. $item->content = $content;
  183. $item->title = $title;
  184. $item->author = $author;
  185. $item->timestamp = $date;
  186. $this->items[] = $item;
  187. }
  188. }
  189. }
  190. }
  191. public function setDatas(array $param){
  192. if (isset($param['captcha_response']))
  193. unset($param['captcha_response']);
  194. parent::setDatas($param);
  195. }
  196. public function getName() {
  197. return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
  198. }
  199. public function getURI() {
  200. return 'http://facebook.com';
  201. }
  202. public function getCacheDuration() {
  203. return 300; // 5 minutes
  204. }
  205. }