1
0

Bridge.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. <?php
  2. /**
  3. * All bridge logic
  4. * Note : adapter are store in other place
  5. */
  6. interface BridgeInterface{
  7. public function collectData(array $param);
  8. public function getCacheDuration();
  9. public function loadMetadatas();
  10. public function getName();
  11. public function getURI();
  12. }
  13. abstract class BridgeAbstract implements BridgeInterface{
  14. protected $cache;
  15. protected $items = array();
  16. public $name = "Unnamed bridge";
  17. public $uri = "";
  18. public $description = 'No description provided';
  19. public $maintainer = 'No maintainer';
  20. public $parameters = array();
  21. /**
  22. * Launch probative exception
  23. */
  24. protected function returnError($message, $code){
  25. throw new \HttpException($message, $code);
  26. }
  27. /**
  28. * Return datas stored in the bridge
  29. * @return mixed
  30. */
  31. public function getDatas(){
  32. return $this->items;
  33. }
  34. /**
  35. * Defined datas with parameters depending choose bridge
  36. * Note : you can define a cache before with "setCache"
  37. * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
  38. */
  39. public function setDatas(array $param){
  40. if( !is_null($this->cache) ){
  41. $this->cache->prepare($param);
  42. $time = $this->cache->getTime();
  43. }
  44. else{
  45. $time = false; // No cache ? No time !
  46. }
  47. if( $time !== false && ( time() - $this->getCacheDuration() < $time ) ){ // Cache file has not expired. Serve it.
  48. $this->items = $this->cache->loadData();
  49. }
  50. else{
  51. $this->collectData($param);
  52. if( !is_null($this->cache) ){ // Cache defined ? We go to refresh is memory :D
  53. $this->cache->saveData($this->getDatas());
  54. }
  55. }
  56. }
  57. /**
  58. * Define default bridge name
  59. */
  60. public function getName(){
  61. return $this->name;
  62. }
  63. /**
  64. * Define default bridge URI
  65. */
  66. public function getURI(){
  67. return $this->uri;
  68. }
  69. /**
  70. * Define default duraction for cache
  71. */
  72. public function getCacheDuration(){
  73. return 3600;
  74. }
  75. /**
  76. * Defined cache object to use
  77. */
  78. public function setCache(\CacheAbstract $cache){
  79. $this->cache = $cache;
  80. return $this;
  81. }
  82. protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){
  83. $contextOptions = array(
  84. 'http' => array(
  85. 'user_agent'=>ini_get('user_agent')
  86. ),
  87. );
  88. if(defined('PROXY_URL')) {
  89. $contextOptions['http']['proxy'] = PROXY_URL;
  90. $contextOptions['http']['request_fulluri'] = true;
  91. if(is_null($context)){
  92. $context = stream_context_create($contextOptions);
  93. } else {
  94. $prevContext=$context;
  95. if(!stream_context_set_option($context,$contextOptions)){
  96. $context=$prevContext;
  97. };
  98. }
  99. }
  100. return file_get_html($url,$use_include_path,$context,$offset,$maxLen,
  101. $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRText,
  102. $defaultSpanText);
  103. }
  104. }
  105. /**
  106. * Extension of BridgeAbstract allowing caching of files downloaded over http files.
  107. * This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded
  108. * separately.
  109. * This class mainly provides a get_cached method which will will download the file from its remote location.
  110. * TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time
  111. * After all, rss-bridge is not respaw, isn't it ?
  112. */
  113. abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
  114. /**
  115. * Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
  116. * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
  117. * @param url url to cache
  118. * @return content of file as string
  119. */
  120. public function get_cached($url) {
  121. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  122. // TODO build this from the variable given to Cache
  123. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  124. $filename = $pageCacheDir.$simplified_url;
  125. if (substr($filename, -1) == '/') {
  126. $filename = $filename."index.html";
  127. }
  128. if(file_exists($filename)) {
  129. // $this->message("loading cached file from ".$filename." for page at url ".$url);
  130. // TODO touch file and its parent, and try to do neighbour deletion
  131. $this->refresh_in_cache($pageCacheDir, $filename);
  132. } else {
  133. // $this->message("we have no local copy of ".$url." Downloading to ".$filename);
  134. $dir = substr($filename, 0, strrpos($filename, '/'));
  135. if(!is_dir($dir)) {
  136. // $this->message("creating directories for ".$dir);
  137. mkdir($dir, 0777, true);
  138. }
  139. $this->download_remote($url, $filename);
  140. }
  141. return file_get_contents($filename);
  142. }
  143. public function get_cached_time($url) {
  144. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  145. // TODO build this from the variable given to Cache
  146. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  147. $filename = $pageCacheDir.$simplified_url;
  148. if (substr($filename, -1) == '/') {
  149. $filename = $filename."index.html";
  150. }
  151. if(!file_exists($filename)) {
  152. $this->get_cached($url);
  153. }
  154. return filectime($filename);
  155. }
  156. private function refresh_in_cache($pageCacheDir, $filename) {
  157. $currentPath = $filename;
  158. while(!$pageCacheDir==$currentPath) {
  159. touch($currentPath);
  160. $currentPath = dirname($currentPath);
  161. }
  162. }
  163. public function download_remote($url , $save_path) {
  164. $f = fopen( $save_path , 'w+');
  165. if($f) {
  166. $handle = fopen($url , "rb");
  167. if($handle) {
  168. while (!feof($handle)) {
  169. $contents = fread($handle, 8192);
  170. if($contents) {
  171. fwrite($f , $contents);
  172. }
  173. }
  174. fclose($handle);
  175. }
  176. fclose($f);
  177. }
  178. }
  179. public function remove_from_cache($url) {
  180. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  181. // TODO build this from the variable given to Cache
  182. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  183. $filename = realpath($pageCacheDir.$simplified_url);
  184. $this->message("removing from cache \"".$filename."\" WELL, NOT REALLY");
  185. // filename is NO GOOD
  186. // unlink($filename);
  187. }
  188. public function message($text) {
  189. $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
  190. $calling = $backtrace[2];
  191. $message = $calling["file"].":".$calling["line"]
  192. ." class ".get_class($this)."->".$calling["function"]
  193. ." - ".$text;
  194. error_log($message);
  195. }
  196. }
  197. class Bridge{
  198. static protected $dirBridge;
  199. public function __construct(){
  200. throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.');
  201. }
  202. /**
  203. * Checks if a bridge is an instantiable bridge.
  204. * @param string $nameBridge name of the bridge that you want to use
  205. * @return true if it is an instantiable bridge, false otherwise.
  206. */
  207. static public function isInstantiable($nameBridge) {
  208. $re = new ReflectionClass($nameBridge);
  209. return $re->IsInstantiable();
  210. }
  211. /**
  212. * Create a new bridge object
  213. * @param string $nameBridge Defined bridge name you want use
  214. * @return Bridge object dedicated
  215. */
  216. static public function create($nameBridge){
  217. if( !static::isValidNameBridge($nameBridge) ){
  218. throw new \InvalidArgumentException('Name bridge must be at least one uppercase follow or not by alphanumeric or dash characters.');
  219. }
  220. $pathBridge = self::getDir() . $nameBridge . '.php';
  221. if( !file_exists($pathBridge) ){
  222. throw new \Exception('The bridge you looking for does not exist. It should be at path '.$pathBridge);
  223. }
  224. require_once $pathBridge;
  225. if(Bridge::isInstantiable($nameBridge)) {
  226. return new $nameBridge();
  227. } else {
  228. return FALSE;
  229. }
  230. }
  231. static public function setDir($dirBridge){
  232. if( !is_string($dirBridge) ){
  233. throw new \InvalidArgumentException('Dir bridge must be a string.');
  234. }
  235. if( !file_exists($dirBridge) ){
  236. throw new \Exception('Dir bridge does not exist.');
  237. }
  238. self::$dirBridge = $dirBridge;
  239. }
  240. static public function getDir(){
  241. $dirBridge = self::$dirBridge;
  242. if( is_null($dirBridge) ){
  243. throw new \LogicException(__CLASS__ . ' class need to know bridge path !');
  244. }
  245. return $dirBridge;
  246. }
  247. static public function isValidNameBridge($nameBridge){
  248. return preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge);
  249. }
  250. /**
  251. * Lists the available bridges.
  252. * @return array List of the bridges
  253. */
  254. static public function listBridges() {
  255. $pathDirBridge = self::getDir();
  256. $listBridge = array();
  257. $dirFiles = scandir($pathDirBridge);
  258. if( $dirFiles !== false ){
  259. foreach( $dirFiles as $fileName ) {
  260. if( preg_match('@([^.]+)\.php$@U', $fileName, $out) ){
  261. $listBridge[] = $out[1];
  262. }
  263. }
  264. }
  265. return $listBridge;
  266. }
  267. static function isWhitelisted( $whitelist, $name ) {
  268. if(in_array("$name", $whitelist) or in_array("$name.php", $whitelist))
  269. return TRUE;
  270. else
  271. return FALSE;
  272. }
  273. }
  274. abstract class RssExpander extends HttpCachingBridgeAbstract{
  275. public $name;
  276. public $uri;
  277. public $description;
  278. public function collectExpandableDatas(array $param, $name){
  279. if (empty($name)) {
  280. $this->returnError('There is no $name for this RSS expander', 404);
  281. }
  282. // $this->message("Loading from ".$param['url']);
  283. // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
  284. $rssContent = simplexml_load_file($name) or $this->returnError('Could not request '.$name, 404);
  285. // $this->message("loaded RSS from ".$param['url']);
  286. // TODO insert RSS format detection
  287. // we suppose for now, we have some RSS 2.0
  288. $this->collect_RSS_2_0_data($rssContent);
  289. }
  290. protected function collect_RSS_2_0_data($rssContent) {
  291. $rssContent = $rssContent->channel[0];
  292. // $this->message("RSS content is ===========\n".var_export($rssContent, true)."===========");
  293. $this->load_RSS_2_0_feed_data($rssContent);
  294. foreach($rssContent->item as $item) {
  295. // $this->message("parsing item ".var_export($item, true));
  296. $this->items[] = $this->parseRSSItem($item);
  297. }
  298. }
  299. protected function RSS_2_0_time_to_timestamp($item) {
  300. return DateTime::createFromFormat('D, d M Y H:i:s e', $item->pubDate)->getTimestamp();
  301. }
  302. // TODO set title, link, description, language, and so on
  303. protected function load_RSS_2_0_feed_data($rssContent) {
  304. $this->name = trim($rssContent->title);
  305. $this->uri = trim($rssContent->link);
  306. $this->description = trim($rssContent->description);
  307. }
  308. /**
  309. * Method should return, from a source RSS item given by lastRSS, one of our Items objects
  310. * @param $item the input rss item
  311. * @return a RSS-Bridge Item, with (hopefully) the whole content)
  312. */
  313. abstract protected function parseRSSItem($item);
  314. public function getDescription() {
  315. return $this->description;
  316. }
  317. }