Bridge.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. <?php
  2. /**
  3. * All bridge logic
  4. * Note : adapter are store in other place
  5. */
  6. interface BridgeInterface{
  7. public function collectData(array $param);
  8. public function getCacheDuration();
  9. public function loadMetadatas();
  10. public function getName();
  11. public function getURI();
  12. }
  13. abstract class BridgeAbstract implements BridgeInterface{
  14. protected $cache;
  15. protected $items = array();
  16. public $name = "Unnamed bridge";
  17. public $uri = "";
  18. public $description = 'No description provided';
  19. public $maintainer = 'No maintainer';
  20. public $parameters = array();
  21. /**
  22. * Loads the Bridge Metadatas
  23. */
  24. public function loadMetadatas() {
  25. }
  26. /**
  27. * Launch probative exception
  28. */
  29. protected function returnError($message, $code){
  30. throw new \HttpException($message, $code);
  31. }
  32. /**
  33. * Return datas stored in the bridge
  34. * @return mixed
  35. */
  36. public function getDatas(){
  37. return $this->items;
  38. }
  39. /**
  40. * Defined datas with parameters depending choose bridge
  41. * Note : you can define a cache before with "setCache"
  42. * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
  43. */
  44. public function setDatas(array $param){
  45. if( !is_null($this->cache) ){
  46. $this->cache->prepare($param);
  47. $time = $this->cache->getTime();
  48. }
  49. else{
  50. $time = false; // No cache ? No time !
  51. }
  52. if( $time !== false && ( time() - $this->getCacheDuration() < $time ) ){ // Cache file has not expired. Serve it.
  53. $this->items = $this->cache->loadData();
  54. }
  55. else{
  56. $this->collectData($param);
  57. if( !is_null($this->cache) ){ // Cache defined ? We go to refresh is memory :D
  58. $this->cache->saveData($this->getDatas());
  59. }
  60. }
  61. }
  62. /**
  63. * Define default duraction for cache
  64. */
  65. public function getCacheDuration(){
  66. return 3600;
  67. }
  68. /**
  69. * Defined cache object to use
  70. */
  71. public function setCache(\CacheAbstract $cache){
  72. $this->cache = $cache;
  73. return $this;
  74. }
  75. protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){
  76. $contextOptions = array(
  77. 'http' => array(
  78. 'user_agent'=>ini_get('user_agent')
  79. ),
  80. );
  81. if(defined('PROXY_URL')) {
  82. $contextOptions['http']['proxy'] = PROXY_URL;
  83. $contextOptions['http']['request_fulluri'] = true;
  84. if(is_null($context)){
  85. $context = stream_context_create($contextOptions);
  86. } else {
  87. $prevContext=$context;
  88. if(!stream_context_set_option($context,$contextOptions)){
  89. $context=$prevContext;
  90. };
  91. }
  92. }
  93. return file_get_html($url,$use_include_path,$context,$offset,$maxLen,
  94. $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRtext,
  95. $defaultSpanText);
  96. }
  97. }
  98. /**
  99. * Extension of BridgeAbstract allowing caching of files downloaded over http files.
  100. * This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded
  101. * separately.
  102. * This class mainly provides a get_cached method which will will download the file from its remote location.
  103. * TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time
  104. * After all, rss-bridge is not respaw, isn't it ?
  105. */
  106. abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
  107. /**
  108. * Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
  109. * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
  110. * @param url url to cache
  111. * @return content of file as string
  112. */
  113. public function get_cached($url) {
  114. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  115. // TODO build this from the variable given to Cache
  116. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  117. $filename = $pageCacheDir.$simplified_url;
  118. if (substr($filename, -1) == '/') {
  119. $filename = $filename."index.html";
  120. }
  121. if(file_exists($filename)) {
  122. // $this->message("loading cached file from ".$filename." for page at url ".$url);
  123. // TODO touch file and its parent, and try to do neighbour deletion
  124. $this->refresh_in_cache($pageCacheDir, $filename);
  125. } else {
  126. // $this->message("we have no local copy of ".$url." Downloading to ".$filename);
  127. $dir = substr($filename, 0, strrpos($filename, '/'));
  128. if(!is_dir($dir)) {
  129. // $this->message("creating directories for ".$dir);
  130. mkdir($dir, 0777, true);
  131. }
  132. $this->download_remote($url, $filename);
  133. }
  134. return file_get_contents($filename);
  135. }
  136. public function get_cached_time($url) {
  137. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  138. // TODO build this from the variable given to Cache
  139. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  140. $filename = $pageCacheDir.$simplified_url;
  141. if (substr($filename, -1) == '/') {
  142. $filename = $filename."index.html";
  143. }
  144. if(!file_exists($filename)) {
  145. $this->get_cached($url);
  146. }
  147. return filectime($filename);
  148. }
  149. private function refresh_in_cache($pageCacheDir, $filename) {
  150. $currentPath = $filename;
  151. while(!$pageCacheDir==$currentPath) {
  152. touch($currentPath);
  153. $currentPath = dirname($currentPath);
  154. }
  155. }
  156. public function download_remote($url , $save_path) {
  157. $f = fopen( $save_path , 'w+');
  158. if($f) {
  159. $handle = fopen($url , "rb");
  160. if($handle) {
  161. while (!feof($handle)) {
  162. $contents = fread($handle, 8192);
  163. if($contents) {
  164. fwrite($f , $contents);
  165. }
  166. }
  167. fclose($handle);
  168. }
  169. fclose($f);
  170. }
  171. }
  172. public function remove_from_cache($url) {
  173. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  174. // TODO build this from the variable given to Cache
  175. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  176. $filename = realpath($pageCacheDir.$simplified_url);
  177. $this->message("removing from cache \"".$filename."\" WELL, NOT REALLY");
  178. // filename is NO GOOD
  179. // unlink($filename);
  180. }
  181. public function message($text) {
  182. $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
  183. $calling = $backtrace[2];
  184. $message = $calling["file"].":".$calling["line"]
  185. ." class ".get_class($this)."->".$calling["function"]
  186. ." - ".$text;
  187. error_log($message);
  188. }
  189. }
  190. class Bridge{
  191. static protected $dirBridge;
  192. public function __construct(){
  193. throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.');
  194. }
  195. /**
  196. * Checks if a bridge is an instantiable bridge.
  197. * @param string $nameBridge name of the bridge that you want to use
  198. * @return true if it is an instantiable bridge, false otherwise.
  199. */
  200. static public function isInstantiable($nameBridge) {
  201. $re = new ReflectionClass($nameBridge);
  202. return $re->IsInstantiable();
  203. }
  204. /**
  205. * Create a new bridge object
  206. * @param string $nameBridge Defined bridge name you want use
  207. * @return Bridge object dedicated
  208. */
  209. static public function create($nameBridge){
  210. if( !static::isValidNameBridge($nameBridge) ){
  211. throw new \InvalidArgumentException('Name bridge must be at least one uppercase follow or not by alphanumeric or dash characters.');
  212. }
  213. $pathBridge = self::getDir() . $nameBridge . '.php';
  214. if( !file_exists($pathBridge) ){
  215. throw new \Exception('The bridge you looking for does not exist. It should be at path '.$pathBridge);
  216. }
  217. require_once $pathBridge;
  218. if(Bridge::isInstantiable($nameBridge)) {
  219. return new $nameBridge();
  220. } else {
  221. return FALSE;
  222. }
  223. }
  224. static public function setDir($dirBridge){
  225. if( !is_string($dirBridge) ){
  226. throw new \InvalidArgumentException('Dir bridge must be a string.');
  227. }
  228. if( !file_exists($dirBridge) ){
  229. throw new \Exception('Dir bridge does not exist.');
  230. }
  231. self::$dirBridge = $dirBridge;
  232. }
  233. static public function getDir(){
  234. $dirBridge = self::$dirBridge;
  235. if( is_null($dirBridge) ){
  236. throw new \LogicException(__CLASS__ . ' class need to know bridge path !');
  237. }
  238. return $dirBridge;
  239. }
  240. static public function isValidNameBridge($nameBridge){
  241. return preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge);
  242. }
  243. /**
  244. * Lists the available bridges.
  245. * @return array List of the bridges
  246. */
  247. static public function listBridges() {
  248. $pathDirBridge = self::getDir();
  249. $listBridge = array();
  250. $dirFiles = scandir($pathDirBridge);
  251. if( $dirFiles !== false ){
  252. foreach( $dirFiles as $fileName ) {
  253. if( preg_match('@([^.]+)\.php$@U', $fileName, $out) ){
  254. $listBridge[] = $out[1];
  255. }
  256. }
  257. }
  258. return $listBridge;
  259. }
  260. static function isWhitelisted( $whitelist, $name ) {
  261. if(in_array("$name", $whitelist) or in_array("$name.php", $whitelist))
  262. return TRUE;
  263. else
  264. return FALSE;
  265. }
  266. }
  267. abstract class RssExpander extends HttpCachingBridgeAbstract{
  268. public $name;
  269. public $uri;
  270. public $description;
  271. public function collectExpandableDatas(array $param, $name){
  272. if (empty($name)) {
  273. $this->returnError('There is no $name for this RSS expander', 404);
  274. }
  275. // $this->message("Loading from ".$param['url']);
  276. // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
  277. $rssContent = simplexml_load_file($name) or $this->returnError('Could not request '.$name, 404);
  278. // $this->message("loaded RSS from ".$param['url']);
  279. // TODO insert RSS format detection
  280. // we suppose for now, we have some RSS 2.0
  281. $this->collect_RSS_2_0_data($rssContent);
  282. }
  283. protected function collect_RSS_2_0_data($rssContent) {
  284. $rssContent = $rssContent->channel[0];
  285. // $this->message("RSS content is ===========\n".var_export($rssContent, true)."===========");
  286. $this->load_RSS_2_0_feed_data($rssContent);
  287. foreach($rssContent->item as $item) {
  288. // $this->message("parsing item ".var_export($item, true));
  289. $this->items[] = $this->parseRSSItem($item);
  290. }
  291. }
  292. protected function RSS_2_0_time_to_timestamp($item) {
  293. return DateTime::createFromFormat('D, d M Y H:i:s e', $item->pubDate)->getTimestamp();
  294. }
  295. // TODO set title, link, description, language, and so on
  296. protected function load_RSS_2_0_feed_data($rssContent) {
  297. $this->name = trim($rssContent->title);
  298. $this->uri = trim($rssContent->link);
  299. $this->description = trim($rssContent->description);
  300. }
  301. /**
  302. * Method should return, from a source RSS item given by lastRSS, one of our Items objects
  303. * @param $item the input rss item
  304. * @return a RSS-Bridge Item, with (hopefully) the whole content)
  305. */
  306. abstract protected function parseRSSItem($item);
  307. public function getName(){
  308. return $this->name;
  309. }
  310. public function getURI(){
  311. return $this->uri;
  312. }
  313. public function getDescription() {
  314. return $this->description;
  315. }
  316. }