rss-bridge-lib.php 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. <?php
  2. /* rss-bridge library.
  3. Foundation functions for rss-bridge project.
  4. See https://github.com/sebsauvage/rss-bridge
  5. Licence: Public domain.
  6. */
  7. ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:20.0) Gecko/20100101 Firefox/20.0');
  8. date_default_timezone_set('UTC');
  9. error_reporting(0);
  10. //ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
  11. define('CACHEDIR','cache/'); // Directory containing cache files. Do not forget trailing slash.
  12. ob_start();
  13. // Create cache directory if it does not exist.
  14. if (!is_dir(CACHEDIR)) { mkdir(CACHEDIR,0705); chmod(CACHEDIR,0705); }
  15. // Import DOM library.
  16. if (!file_exists('simple_html_dom.php'))
  17. {
  18. header('HTTP/1.1 500 Internal Server Error');
  19. header('Content-Type: text/plain');
  20. die('"PHP Simple HTML DOM Parser" is missing. Get it from http://simplehtmldom.sourceforge.net/ and place the script "simple_html_dom.php" in the same folder to allow me to work.');
  21. }
  22. require_once('simple_html_dom.php');
  23. /**
  24. * Abstract RSSBridge class on which all bridges are build upon.
  25. * It provides utility methods (cache, ATOM feed building...)
  26. */
  27. abstract class RssBridgeAbstractClass
  28. {
  29. /**
  30. * $items is an array of dictionnaries. Each subclass must fill this array when collectData() is called.
  31. * eg. $items = Array( Array('uri'=>'http://foo.bar', 'title'=>'My beautiful foobar', 'content'='Hello, <b>world !</b>','timestamp'=>'1375864834'),
  32. * Array('uri'=>'http://toto.com', 'title'=>'Welcome to toto', 'content'='What is this website about ?','timestamp'=>'1375868313')
  33. * )
  34. * Keys in dictionnaries:
  35. * uri (string;mandatory) = The URI the item points to.
  36. * title (string;mandatory) = Title of item
  37. * content (string;optionnal) = item content (usually HTML code)
  38. * timestamp (string;optionnal) = item date. Must be in EPOCH format.
  39. * Other keys can be added, but will be ignored.
  40. * $items will be used to build the ATOM feed, json and other outputs.
  41. */
  42. var $items;
  43. private $contentType; // MIME type returned to browser.
  44. /**
  45. * Sets the content-type returns to browser.
  46. * Example: $this->setContentType('text/html; charset=UTF-8')
  47. */
  48. private function setContentType($value)
  49. {
  50. $this->contentType = $value;
  51. header('Content-Type: '.$value);
  52. }
  53. /**
  54. * collectData() will be called to ask the bridge to go collect data on the net.
  55. * All derived classes must implement this method.
  56. * This method must fill $this->items with collected items.
  57. * Input: $request : The incoming request (=$_GET). This can be used or ignored by the bridge.
  58. */
  59. abstract protected function collectData($request);
  60. /**
  61. * Returns a HTTP error to user, with a message.
  62. * Example: $this->returnError('404 Not Found', 'ERROR: no results.');
  63. */
  64. protected function returnError($code, $message)
  65. {
  66. header("HTTP/1.1 $code"); header('Content-Type: text/plain;charset=UTF-8');
  67. die($message);
  68. }
  69. /**
  70. * Builds an ATOM feed from $this->items and return it to browser.
  71. */
  72. private function returnATOM()
  73. {
  74. $this->setContentType('application/atom+xml; charset=UTF-8');
  75. echo '<?xml version="1.0" encoding="UTF-8"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:thr="http://purl.org/syndication/thread/1.0" xml:lang="en-US">'."\n";
  76. echo '<title type="text">'.htmlspecialchars($this->bridgeName).'</title>'."\n";
  77. echo '<id>http'.(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '')."://{$_SERVER['HTTP_HOST']}{$_SERVER['PATH_INFO']}".'/</id>'."\n";
  78. echo '<updated></updated>'."\n"; // FIXME
  79. echo '<link rel="alternate" type="text/html" href="'.htmlspecialchars($this->bridgeURI).'" />'."\n";
  80. echo '<link rel="self" href="http'.(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '')."://{$_SERVER['HTTP_HOST']}".htmlentities($_SERVER['REQUEST_URI']).'" />'."\n"."\n";
  81. foreach($this->items as $item) {
  82. echo '<entry><author><name>'.htmlspecialchars($this->bridgeName).'</name><uri>'.htmlspecialchars($this->bridgeURI).'</uri></author>'."\n";
  83. echo '<title type="html"><![CDATA['.$item['title'].']]></title>'."\n";
  84. echo '<link rel="alternate" type="text/html" href="'.$item['uri'].'" />'."\n";
  85. echo '<id>'.$item['uri'].'</id>'."\n";
  86. if (isset($item['timestamp']))
  87. {
  88. echo '<updated>'.date(DATE_ATOM, $item['timestamp']).'</updated>'."\n";
  89. }
  90. else
  91. {
  92. echo '<updated></updated>'."\n";
  93. }
  94. if (isset($item['content']))
  95. {
  96. echo '<content type="html"><![CDATA['.$item['content'].']]></content>'."\n";
  97. }
  98. else
  99. {
  100. echo '<content type="html"></content>'."\n";
  101. }
  102. // FIXME: Security: Disable Javascript ?
  103. echo '</entry>'."\n\n";
  104. }
  105. echo '</feed>';
  106. }
  107. private function returnHTML()
  108. {
  109. $this->setContentType('text/html; charset=UTF-8');
  110. echo '<html><head><title>'.htmlspecialchars($this->bridgeName).'</title>';
  111. echo '<style>body{font-family:"Trebuchet MS",Verdana,Arial,Helvetica,sans-serif;font-size:10pt;background-color:#aaa;}div.rssitem{border:1px solid black;padding:5px;margin:10px;background-color:#fff;}</style></head><body>';
  112. echo '<h1>'.htmlspecialchars($this->bridgeName).'</h1>';
  113. foreach($this->items as $item) {
  114. echo '<div class="rssitem"><h2><a href="'.$item['uri'].'">'.htmlspecialchars(strip_tags($item['title'])).'</a></h2>';
  115. if (isset($item['timestamp'])) { echo '<small>'.date(DATE_ATOM, $item['timestamp']).'</small>'; }
  116. if (isset($item['content'])) { echo '<p>'.$item['content'].'</p>'; }
  117. echo "</div>\n\n";
  118. }
  119. echo '</body></html>';
  120. }
  121. /**
  122. * Builds a JSON string from $this->items and return it to browser.
  123. */
  124. private function returnJSON()
  125. {
  126. $this->setContentType('application/json');
  127. echo json_encode($this->items);
  128. }
  129. /**
  130. * Returns $this->items as raw php data.
  131. */
  132. private function returnPlaintext()
  133. {
  134. $this->setContentType('text/plain;charset=UTF-8');
  135. print_r($this->items);
  136. }
  137. /**
  138. * Start processing request and return response to browser.
  139. */
  140. public function process()
  141. {
  142. $this->serveCachedVersion();
  143. // Cache file does not exists or has expired: We re-fetch the results and cache it.
  144. $this->collectData($_GET);
  145. if (empty($this->items)) { $this->returnError('404 Not Found', 'ERROR: no results.'); }
  146. $format = 'atom';
  147. if (!empty($_GET['format'])) { $format = $_GET['format']; }
  148. switch($format) {
  149. case 'plaintext':
  150. $this->returnPlaintext();
  151. break;
  152. case 'json':
  153. $this->returnJSON();
  154. break;
  155. case 'html':
  156. $this->returnHTML();
  157. break;
  158. default:
  159. $this->returnATOM();
  160. }
  161. $this->storeReponseInCache();
  162. }
  163. /**
  164. * Returns the cached version of current request URI directly to the browser
  165. * if it exists and if cache has not expired.
  166. * Continues execution no cached version available.
  167. */
  168. private function serveCachedVersion()
  169. {
  170. // See if cache exists for this request
  171. $cachefile = CACHEDIR.hash('sha1',$_SERVER['REQUEST_URI']).'.cache'; // Cache path and filename
  172. if (file_exists($cachefile)) { // The cache file exists.
  173. if (time() - ($this->cacheDuration*60) < filemtime($cachefile)) { // Cache file has not expired. Serve it.
  174. $data = json_decode(file_get_contents($cachefile),true);
  175. header('Content-Type: '.$data['Content-Type']); // Send proper MIME Type
  176. header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile)));
  177. echo $data['data'];
  178. exit();
  179. }
  180. }
  181. }
  182. /**
  183. * Stores currently generated page in cache.
  184. */
  185. private function storeReponseInCache()
  186. {
  187. $cachefile = CACHEDIR.hash('sha1',$_SERVER['REQUEST_URI']).'.cache'; // Cache path and filename
  188. $data = Array('data'=>ob_get_contents(), 'Content-Type'=>$this->contentType);
  189. file_put_contents($cachefile,json_encode($data));
  190. ob_end_flush();
  191. }
  192. }
  193. ?>