|
@@ -73,6 +73,60 @@ abstract class BridgeAbstract implements BridgeInterface{
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Extension of BridgeAbstract allowing caching of files downloaded over http files.
|
|
|
+ * This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded
|
|
|
+ * separately.
|
|
|
+ * This class mainly provides a get_cached method which will will download the file from its remote location.
|
|
|
+ * TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time
|
|
|
+ * After all, rss-bridge is not respaw, isn't it ?
|
|
|
+ */
|
|
|
+abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
|
|
|
+ * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
|
|
|
+ * @param url url to cache
|
|
|
+ * @return content of file as string
|
|
|
+ */
|
|
|
+ public function get_cached($url) {
|
|
|
+ $simplified_url = str_replace(["http://", "https://", "?", "&"], ["", "", "/", "/"], $url);
|
|
|
+ // TODO build this from the variable given to Cache
|
|
|
+ $pageCacheDir = __DIR__ . '/../cache/'."pages/";
|
|
|
+ $filename = $pageCacheDir.$simplified_url;
|
|
|
+ if (substr($filename, -1) == '/') {
|
|
|
+ $filename = $filename."index.html";
|
|
|
+ }
|
|
|
+ if(file_exists($filename)) {
|
|
|
+ // TODO touch file and its parent, and try to do neighbour deletion
|
|
|
+ $currentPath = $filename;
|
|
|
+ while(!$pageCacheDir==$currentPath) {
|
|
|
+ touch($currentPath);
|
|
|
+ $currentPath = dirname($currentPath);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ error_log("we have no local copy of ".$url." Downloading !");
|
|
|
+ $dir = substr($filename, 0, strrpos($filename, '/'));
|
|
|
+ if(!is_dir($dir)) {
|
|
|
+ mkdir($dir, 0777, true);
|
|
|
+ }
|
|
|
+ $this->download_remote($url, $filename);
|
|
|
+ }
|
|
|
+ return file_get_contents($filename);
|
|
|
+ }
|
|
|
+
|
|
|
+ public function download_remote($url , $save_path) {
|
|
|
+ $f = fopen( $save_path , 'w+');
|
|
|
+ $handle = fopen($url , "rb");
|
|
|
+ while (!feof($handle)) {
|
|
|
+ $contents = fread($handle, 8192);
|
|
|
+ fwrite($f , $contents);
|
|
|
+ }
|
|
|
+ fclose($handle);
|
|
|
+ fclose($f);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
class Bridge{
|
|
|
|
|
|
static protected $dirBridge;
|