Bridge.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. <?php
  2. /**
  3. * All bridge logic
  4. * Note : adapter are store in other place
  5. */
  6. interface BridgeInterface{
  7. public function collectData(array $param);
  8. public function getName();
  9. public function getURI();
  10. public function getCacheDuration();
  11. }
  12. abstract class BridgeAbstract implements BridgeInterface{
  13. protected $cache;
  14. protected $items = array();
  15. /**
  16. * Launch probative exception
  17. */
  18. protected function returnError($message, $code){
  19. throw new \HttpException($message, $code);
  20. }
  21. /**
  22. * Return datas store in the bridge
  23. * @return mixed
  24. */
  25. public function getDatas(){
  26. return $this->items;
  27. }
  28. /**
  29. * Defined datas with parameters depending choose bridge
  30. * Note : you can defined a cache before with "setCache"
  31. * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
  32. */
  33. public function setDatas(array $param){
  34. if( !is_null($this->cache) ){
  35. $this->cache->prepare($param);
  36. $time = $this->cache->getTime();
  37. }
  38. else{
  39. $time = false; // No cache ? No time !
  40. }
  41. if( $time !== false && ( time() - $this->getCacheDuration() < $time ) ){ // Cache file has not expired. Serve it.
  42. $this->items = $this->cache->loadData();
  43. }
  44. else{
  45. $this->collectData($param);
  46. if( !is_null($this->cache) ){ // Cache defined ? We go to refresh is memory :D
  47. $this->cache->saveData($this->getDatas());
  48. }
  49. }
  50. }
  51. /**
  52. * Define default duraction for cache
  53. */
  54. public function getCacheDuration(){
  55. return 3600;
  56. }
  57. /**
  58. * Defined cache object to use
  59. */
  60. public function setCache(\CacheAbstract $cache){
  61. $this->cache = $cache;
  62. return $this;
  63. }
  64. /**
  65. * Set default image SRC attribute to point on given server when none is provided (that's to say when image src starts with '/'
  66. */
  67. public function defaultImageSrcTo($content, $server) {
  68. foreach($content->find('img') as $image) {
  69. if(strpos($image->src, '/')==0) {
  70. $image->src = $server.$image->src;
  71. }
  72. }
  73. }
  74. }
  75. /**
  76. * Extension of BridgeAbstract allowing caching of files downloaded over http files.
  77. * This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded
  78. * separately.
  79. * This class mainly provides a get_cached method which will will download the file from its remote location.
  80. * TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time
  81. * After all, rss-bridge is not respaw, isn't it ?
  82. */
  83. abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
  84. /**
  85. * Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
  86. * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
  87. * @param url url to cache
  88. * @return content of file as string
  89. */
  90. public function get_cached($url) {
  91. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  92. // TODO build this from the variable given to Cache
  93. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  94. $filename = $pageCacheDir.$simplified_url;
  95. if (substr($filename, -1) == '/') {
  96. $filename = $filename."index.html";
  97. }
  98. if(file_exists($filename)) {
  99. // $this->message("loading cached file from ".$filename." for page at url ".$url);
  100. // TODO touch file and its parent, and try to do neighbour deletion
  101. $this->refresh_in_cache($pageCacheDir, $filename);
  102. } else {
  103. // $this->message("we have no local copy of ".$url." Downloading to ".$filename);
  104. $dir = substr($filename, 0, strrpos($filename, '/'));
  105. if(!is_dir($dir)) {
  106. // $this->message("creating directories for ".$dir);
  107. mkdir($dir, 0777, true);
  108. }
  109. $this->download_remote($url, $filename);
  110. }
  111. return file_get_contents($filename);
  112. }
  113. private function refresh_in_cache($pageCacheDir, $filename) {
  114. $currentPath = $filename;
  115. while(!$pageCacheDir==$currentPath) {
  116. touch($currentPath);
  117. $currentPath = dirname($currentPath);
  118. }
  119. }
  120. public function download_remote($url , $save_path) {
  121. $f = fopen( $save_path , 'w+');
  122. if($f) {
  123. $handle = fopen($url , "rb");
  124. if($handle) {
  125. while (!feof($handle)) {
  126. $contents = fread($handle, 8192);
  127. if($contents) {
  128. fwrite($f , $contents);
  129. }
  130. }
  131. fclose($handle);
  132. }
  133. fclose($f);
  134. }
  135. }
  136. public function remove_from_cache($url) {
  137. $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
  138. // TODO build this from the variable given to Cache
  139. $pageCacheDir = __DIR__ . '/../cache/'."pages/";
  140. $filename = realpath($pageCacheDir.$simplified_url);
  141. $this->message("removing from cache \"".$filename."\" WELL, NOT REALLY");
  142. // filename is NO GOOD
  143. // unlink($filename);
  144. }
  145. public function message($text) {
  146. $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
  147. $calling = $backtrace[2];
  148. $message = $calling["file"].":".$calling["line"]
  149. ." class ".get_class($this)."->".$calling["function"]
  150. ." - ".$text;
  151. error_log($message);
  152. }
  153. }
  154. class Bridge{
  155. static protected $dirBridge;
  156. public function __construct(){
  157. throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.');
  158. }
  159. /**
  160. * Create a new bridge object
  161. * @param string $nameBridge Defined bridge name you want use
  162. * @return Bridge object dedicated
  163. */
  164. static public function create($nameBridge){
  165. if( !static::isValidNameBridge($nameBridge) ){
  166. throw new \InvalidArgumentException('Name bridge must be at least one uppercase follow or not by alphanumeric or dash characters.');
  167. }
  168. $pathBridge = self::getDir() . $nameBridge . '.php';
  169. if( !file_exists($pathBridge) ){
  170. throw new \Exception('The bridge you looking for does not exist. It should be at path '.$pathBridge);
  171. }
  172. require_once $pathBridge;
  173. return new $nameBridge();
  174. }
  175. static public function setDir($dirBridge){
  176. if( !is_string($dirBridge) ){
  177. throw new \InvalidArgumentException('Dir bridge must be a string.');
  178. }
  179. if( !file_exists($dirBridge) ){
  180. throw new \Exception('Dir bridge does not exist.');
  181. }
  182. self::$dirBridge = $dirBridge;
  183. }
  184. static public function getDir(){
  185. $dirBridge = self::$dirBridge;
  186. if( is_null($dirBridge) ){
  187. throw new \LogicException(__CLASS__ . ' class need to know bridge path !');
  188. }
  189. return $dirBridge;
  190. }
  191. static public function isValidNameBridge($nameBridge){
  192. return preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge);
  193. }
  194. /**
  195. * Read bridge dir and catch informations about each bridge depending annotation
  196. * @return array Informations about each bridge
  197. */
  198. static public function searchInformation(){
  199. $pathDirBridge = self::getDir();
  200. $listBridge = array();
  201. $searchCommonPattern = array('maintainer', 'description', 'homepage', 'name');
  202. $dirFiles = scandir($pathDirBridge);
  203. if( $dirFiles !== false ){
  204. foreach( $dirFiles as $fileName ){
  205. if( preg_match('@([^.]+)\.php@U', $fileName, $out) ){ // Is PHP file ?
  206. $infos = array(); // Information about the bridge
  207. $resParse = token_get_all(file_get_contents($pathDirBridge . $fileName)); // Parse PHP file
  208. foreach($resParse as $v){
  209. if( is_array($v) && $v[0] == T_DOC_COMMENT ){ // Lexer node is COMMENT ?
  210. $commentary = $v[1];
  211. foreach( $searchCommonPattern as $name){ // Catch information with common pattern
  212. preg_match('#@' . preg_quote($name, '#') . '\s+(.+)#', $commentary, $outComment);
  213. if( isset($outComment[1]) ){
  214. $infos[$name] = $outComment[1];
  215. }
  216. }
  217. preg_match_all('#@use(?<num>[1-9][0-9]*)\s?\((?<args>.+)\)(?:\r|\n)#', $commentary, $outComment); // Catch specific information about "use".
  218. if( isset($outComment['args']) && is_array($outComment['args']) ){
  219. $infos['use'] = array();
  220. foreach($outComment['args'] as $num => $args){ // Each use
  221. preg_match_all('#(?<name>[a-z]+)="(?<value>.*)"(?:,|$)#U', $args, $outArg); // Catch arguments for current use
  222. if( isset($outArg['name']) ){
  223. $usePos = $outComment['num'][$num]; // Current use name
  224. if( !isset($infos['use'][$usePos]) ){ // Not information actually for this "use" ?
  225. $infos['use'][$usePos] = array();
  226. }
  227. foreach($outArg['name'] as $numArg => $name){ // Each arguments
  228. $infos['use'][$usePos][$name] = $outArg['value'][$numArg];
  229. }
  230. }
  231. }
  232. }
  233. }
  234. }
  235. if( isset($infos['name']) ){ // If informations containt at least a name
  236. $listBridge[$out[1]] = $infos;
  237. }
  238. }
  239. }
  240. }
  241. return $listBridge;
  242. }
  243. }