소스 검색

Prepared Gawker bridge by extracting file cache from initial Liberation bridge

Riduidel 10 년 전
부모
커밋
f2f82c7d03
2개의 변경된 파일55개의 추가작업 그리고 36개의 파일을 삭제
  1. 1 36
      bridges/Les400Culs.php
  2. 54 0
      lib/Bridge.php

+ 1 - 36
bridges/Les400Culs.php

@@ -6,7 +6,7 @@
 * @update 20/02/2014
 */
 define("SEXE", "http://sexes.blogs.liberation.fr");
-class Les400Culs extends BridgeAbstract{
+class Les400Culs extends HttpCachingBridgeAbstract{
 
     public function collectData(array $param){
         $html = file_get_html($this->getURI()) or $this->returnError('Could not request '.$this->getURI(), 404);
@@ -60,39 +60,4 @@ class Les400Culs extends BridgeAbstract{
     public function getDescription(){
         return "La planète sexe, vue et racontée par Agnès Giard. Et par rss-bridge";
     }
-    
-    /**
-     * Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
-     * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
-     * @param url url to cache
-     * @return content of file as string
-     */
-    public function get_cached($url) {
-        $simplified_url = str_replace(["http://", "https://", "?", "&"], ["", "", "/", "/"], $url);
-        $filename =  __DIR__ . '/../cache/'."pages/".$simplified_url;
-        if (substr($filename, -1) == '/') {
-            $filename = $filename."index.html";
-        }
-        if(!file_exists($filename)) {
-            error_log("we have no local copy of ".$url." Downloading !");
-            $dir = substr($filename, 0, strrpos($filename, '/'));
-            if(!is_dir($dir)) {
-                mkdir($dir, 0777, true);
-            }
-            $this->download_remote($url, $filename);
-        }
-        return file_get_contents($filename);
-    }
-
-    public function download_remote($url , $save_path) {
-        $f = fopen( $save_path , 'w+');
-        $handle = fopen($url , "rb");
-        while (!feof($handle)) {
-            $contents = fread($handle, 8192);
-            fwrite($f , $contents);
-        }
-        fclose($handle);
-        fclose($f);
-    }
-
 }

+ 54 - 0
lib/Bridge.php

@@ -73,6 +73,60 @@ abstract class BridgeAbstract implements BridgeInterface{
     }
 }
 
+/**
+ * Extension of BridgeAbstract allowing caching of files downloaded over http files.
+ * This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded
+ * separately.
+ * This class mainly provides a get_cached method which will will download the file from its remote location.
+ * TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time
+ * After all, rss-bridge is not respaw, isn't it ?
+ */
+abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
+    
+    /**
+     * Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
+     * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
+     * @param url url to cache
+     * @return content of file as string
+     */
+    public function get_cached($url) {
+        $simplified_url = str_replace(["http://", "https://", "?", "&"], ["", "", "/", "/"], $url);
+		// TODO build this from the variable given to Cache
+		$pageCacheDir = __DIR__ . '/../cache/'."pages/";
+        $filename =  $pageCacheDir.$simplified_url;
+        if (substr($filename, -1) == '/') {
+            $filename = $filename."index.html";
+        }
+        if(file_exists($filename)) {
+			// TODO touch file and its parent, and try to do neighbour deletion
+			$currentPath = $filename;
+			while(!$pageCacheDir==$currentPath) {
+				touch($currentPath);
+				$currentPath = dirname($currentPath);
+			}
+		} else {
+            error_log("we have no local copy of ".$url." Downloading !");
+            $dir = substr($filename, 0, strrpos($filename, '/'));
+            if(!is_dir($dir)) {
+                mkdir($dir, 0777, true);
+            }
+            $this->download_remote($url, $filename);
+        }
+        return file_get_contents($filename);
+    }
+
+    public function download_remote($url , $save_path) {
+        $f = fopen( $save_path , 'w+');
+        $handle = fopen($url , "rb");
+        while (!feof($handle)) {
+            $contents = fread($handle, 8192);
+            fwrite($f , $contents);
+        }
+        fclose($handle);
+        fclose($f);
+    }
+}
+
 class Bridge{
 
     static protected $dirBridge;