瀏覽代碼

Ajout de la date de publication en utilisant la date de dernière modification du cache

Kevin Lagaisse 9 年之前
父節點
當前提交
de16ba234e
共有 2 個文件被更改,包括 39 次插入6 次删除
  1. 24 5
      bridges/CpasbienBridge.php
  2. 15 1
      lib/Bridge.php

+ 24 - 5
bridges/CpasbienBridge.php

@@ -10,7 +10,23 @@
 * @maintainer lagaisse
 * @use1(q="keywords like this")
 */
-class CpasbienBridge extends BridgeAbstract{
+
+// simple_html_dom funtion to get the dom from contents instead from file
+function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
+{
+    // We DO force the tags to be terminated.
+    $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
+
+    if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
+    {
+        return false;
+    }
+    // The second parameter can force the selectors to all be lowercase.
+    $dom->load($contents, $lowercase, $stripRN);
+    return $dom;
+}
+
+class CpasbienBridge extends HttpCachingBridgeAbstract{
     
     private $request;
 
@@ -27,14 +43,16 @@ class CpasbienBridge extends BridgeAbstract{
         foreach ($html->find('#gauche',0)->find('div') as $episode) {
             if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
             {
-                $htmlepisode=file_get_html($episode->find('a', 0)->getAttribute('href'));
+                
+                $htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
 
                 $item = new \Item();
                 $item->name = $episode->find('a', 0)->text();
                 $item->title = $episode->find('a', 0)->text();
-                $element=$htmlepisode->find('#textefiche', 0)->find('p',1);
-                if (isset($element)) {
-                    $item->content = $element->text();
+                $item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href'));
+                $textefiche=$htmlepisode->find('#textefiche', 0)->find('p',1);
+                if (isset($textefiche)) {
+                    $item->content = $textefiche->text();
                 }
                 else {
                     $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text();    
@@ -42,6 +60,7 @@ class CpasbienBridge extends BridgeAbstract{
 
                 $item->id = $episode->find('a', 0)->getAttribute('href');
                 $item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href');
+                $item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img',0)->getAttribute('src');
                 $this->items[] = $item;
             }
         }

+ 15 - 1
lib/Bridge.php

@@ -123,7 +123,21 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
         }
         return file_get_contents($filename);
     }
-    
+  
+     public function get_cached_time($url) {
+        $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
+        // TODO build this from the variable given to Cache
+        $pageCacheDir = __DIR__ . '/../cache/'."pages/";
+        $filename =  $pageCacheDir.$simplified_url;
+        if (substr($filename, -1) == '/') {
+            $filename = $filename."index.html";
+        }
+        if(!file_exists($filename)) {
+            $this->get_cached($url);
+        }
+        return filectime($filename);
+    }  
+
     private function refresh_in_cache($pageCacheDir, $filename) {
 		$currentPath = $filename;
 		while(!$pageCacheDir==$currentPath) {