Browse Source

universal image caching and automatic cache directories maintenance

Andrew Dolgov 12 years ago
parent
commit
3c696512fc
7 changed files with 110 additions and 44 deletions
  1. 0 0
      cache/images/.empty
  2. 4 3
      classes/feeds.php
  3. 10 19
      classes/pref_feeds.php
  4. 0 6
      config.php-dist
  5. 9 3
      image.php
  6. 71 10
      include/rssfuncs.php
  7. 16 3
      mobile/image.php

+ 0 - 0
cache/images/.empty


+ 4 - 3
classes/feeds.php

@@ -184,9 +184,10 @@ class Feeds extends Protected_Handler {
 			catchupArticlesById($this->link, $ids, $cmode);
 		} */
 
-		//if ($method == "ForceUpdate" && $feed && is_numeric($feed) > 0) {
-		//	update_rss_feed($this->link, $feed, true);
-		//}
+		if ($method == "ForceUpdate" && $feed && is_numeric($feed) > 0) {
+			include "rssfuncs.php";
+			update_rss_feed($this->link, $feed, true);
+		}
 
 		if ($method == "MarkAllRead")  {
 			catchup_feed($this->link, $feed, $cat_view);

+ 10 - 19
classes/pref_feeds.php

@@ -448,12 +448,10 @@ class Pref_Feeds extends Protected_Handler {
 			$checked = "";
 		}
 
-		if (SIMPLEPIE_CACHE_IMAGES) {
-			print "<hr/><input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" id=\"cache_images\"
-			name=\"cache_images\"
+		print "<hr/><input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" id=\"cache_images\"
+		name=\"cache_images\"
 			$checked>&nbsp;<label for=\"cache_images\">".
-			__('Cache images locally (SimplePie only)')."</label>";
-		}
+		__('Cache images locally')."</label>";
 
 		$mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update"));
 
@@ -655,16 +653,13 @@ class Pref_Feeds extends Protected_Handler {
 
 		print "&nbsp;"; $this->batch_edit_cbox("always_display_enclosures", "always_display_enclosures_l");
 
-		if (SIMPLEPIE_CACHE_IMAGES) {
-			print "<br/><input disabled=\"1\" type=\"checkbox\" id=\"cache_images\"
-				name=\"cache_images\"
-				dojoType=\"dijit.form.CheckBox\">&nbsp;<label class='insensitive' id=\"cache_images_l\"
-				for=\"cache_images\">".
-			__('Cache images locally')."</label>";
+		print "<br/><input disabled=\"1\" type=\"checkbox\" id=\"cache_images\"
+			name=\"cache_images\"
+			dojoType=\"dijit.form.CheckBox\">&nbsp;<label class='insensitive' id=\"cache_images_l\"
+			for=\"cache_images\">".
+		__('Cache images locally')."</label>";
 
-
-			print "&nbsp;"; $this->batch_edit_cbox("cache_images", "cache_images_l");
-		}
+		print "&nbsp;"; $this->batch_edit_cbox("cache_images", "cache_images_l");
 
 		print "<br/><input disabled=\"1\" type=\"checkbox\" id=\"mark_unread_on_update\"
 			name=\"mark_unread_on_update\"
@@ -741,11 +736,7 @@ class Pref_Feeds extends Protected_Handler {
 			$category_qpart_nocomma = "";
 		}
 
-		if (SIMPLEPIE_CACHE_IMAGES) {
-			$cache_images_qpart = "cache_images = $cache_images,";
-		} else {
-			$cache_images_qpart = "";
-		}
+		$cache_images_qpart = "cache_images = $cache_images,";
 
 		if (!$batch) {
 

+ 0 - 6
config.php-dist

@@ -55,12 +55,6 @@
 	// *** Feed settings ***
 	// *********************
 
-	define('SIMPLEPIE_CACHE_IMAGES', false);
-	// Allow caching feed images when using SimplePie, to bypass hotlink
-	// prevention and such at expense of local disk space and bandwidth.
-	// Note that you (or your users) also have to enable image caching 
-	// in feed editor.
-
 	define('DEFAULT_UPDATE_METHOD', 0);
 	// Which feed parsing library to use as default:
 	// 0 - Magpie

+ 9 - 3
image.php

@@ -1,9 +1,15 @@
 <?php
-	set_include_path(get_include_path() . PATH_SEPARATOR . 
+	set_include_path(get_include_path() . PATH_SEPARATOR .
 		dirname(__FILE__) . "/include");
 
 	require_once "config.php";
-	require_once "lib/simplepie/simplepie.inc";
 
-	SimplePie_Misc::display_cached_file($_GET['i'], SIMPLEPIE_CACHE_DIR, 'spi');
+	$filename = CACHE_DIR . '/images/' . sha1($_GET['url']) . '.png';
+
+	if (file_exists($filename)) {
+		header("Content-type: image/png");
+		echo file_get_contents($filename);
+	} else {
+		header("Location: " . $_GET['url']);
+	}
 ?>

+ 71 - 10
include/rssfuncs.php

@@ -136,6 +136,8 @@
 				WHERE id IN (%s)", implode(',', $feed_ids)));
 		}
 
+		expire_cached_files($debug);
+
 		// For each feed, we call the feed update function.
 		while ($line = array_pop($feeds_to_update)) {
 
@@ -364,15 +366,6 @@
 				$rss->set_output_encoding('UTF-8');
 				$rss->force_feed(true);
 
-				if (SIMPLEPIE_CACHE_IMAGES && $cache_images) {
-
-					if ($debug_enabled) {
-						_debug("enabling image cache");
-					}
-
-					$rss->set_image_handler("image.php", 'i');
-				}
-
 				if ($debug_enabled) {
 					_debug("feed update interval (sec): " .
 						get_feed_update_interval($link, $feed)*60);
@@ -685,9 +678,13 @@
 					}
 				}
 
+				if ($cache_images)
+					$entry_content = cache_images($entry_content, $site_url, $debug_enabled);
+
 				if ($_REQUEST["xdebug"] == 2) {
 					print "update_rss_feed: content: ";
 					print_r(htmlspecialchars($entry_content));
+					print "\n";
 				}
 
 				$entry_content_unescaped = $entry_content;
@@ -735,7 +732,7 @@
 
 				$entry_content = db_escape_string($entry_content, false);
 
-				$content_hash = "SHA1:" . sha1(strip_tags($entry_content));
+				$content_hash = "SHA1:x" . sha1(strip_tags($entry_content));
 
 				$entry_title = db_escape_string($entry_title);
 				$entry_link = db_escape_string($entry_link);
@@ -1311,7 +1308,71 @@
 
 	}
 
+	function cache_images($html, $site_url, $debug) {
+		$cache_dir = CACHE_DIR . "/images";
+
+		libxml_use_internal_errors(true);
+
+		$charset_hack = '<head>
+			<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+		</head>';
+
+		$doc = new DOMDocument();
+		$doc->loadHTML($charset_hack . $html);
+		$xpath = new DOMXPath($doc);
+
+		$entries = $xpath->query('(//img[@src])');
+
+		foreach ($entries as $entry) {
+			if ($entry->hasAttribute('src')) {
+				$src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
+
+				$local_filename = CACHE_DIR . "/images/" . sha1($src) . ".png";
+
+				if ($debug) _debug("cache_images: downloading: $src to $local_filename");
+
+				if (!file_exists($local_filename)) {
+					$file_content = fetch_file_contents($src);
+
+					if ($file_content) {
+						file_put_contents($local_filename, $file_content);
+					}
+				}
+
+				if (file_exists($local_filename)) {
+					$entry->setAttribute('src', SELF_URL_PATH . '/image.php?url=' .
+						htmlspecialchars($src));
+				}
+			}
+		}
+
+		$node = $doc->getElementsByTagName('body')->item(0);
+
+		return $doc->saveXML($node);
+	}
+
+	function expire_cached_files($debug) {
+		foreach (array("magpie", "simplepie", "images") as $dir) {
+			$cache_dir = CACHE_DIR . "/$dir";
 
+			if ($debug) _debug("Expiring $cache_dir");
 
+			$num_deleted = 0;
+
+			if (is_writable($cache_dir)) {
+				$files = glob("$cache_dir/*");
+
+				foreach ($files as $file) {
+					if (time() - filemtime($file) > 86400*7) {
+						unlink($file);
+
+						++$num_deleted;
+					}
+				}
+			}
+
+			if ($debug) _debug("Removed $num_deleted files.");
+		}
+	}
 
 ?>

+ 16 - 3
mobile/image.php

@@ -1,6 +1,19 @@
 <?php
-	require_once "../config.php";
-	require_once "../lib/simplepie/simplepie.inc";
+	set_include_path(get_include_path() . PATH_SEPARATOR .
+		dirname(__FILE__) . PATH_SEPARATOR .
+		dirname(dirname(__FILE__)) . PATH_SEPARATOR .
+		dirname(dirname(__FILE__)) . "/include" );
 
-	SimplePie_Misc::display_cached_file($_GET['i'], '../'.SIMPLEPIE_CACHE_DIR, 'spi');
+	require_once "config.php";
+
+	chdir('..');
+
+	$filename = CACHE_DIR . '/images/' . sha1($_GET['url']) . '.png';
+
+	if (file_exists($filename)) {
+		header("Content-type: image/png");
+		echo file_get_contents($filename);
+	} else {
+		header("Location: " . $_GET['url']);
+	}
 ?>