implement ttrss_feeds.cache_content
This commit is contained in:
parent
7b28a98697
commit
87764a50cf
10 changed files with 142 additions and 39 deletions
|
@ -280,7 +280,7 @@ class API extends Handler {
|
|||
|
||||
$article_id = join(",", array_filter(explode(",", db_escape_string($_REQUEST["article_id"])), is_numeric));
|
||||
|
||||
$query = "SELECT id,title,link,content,feed_id,comments,int_id,
|
||||
$query = "SELECT id,title,link,content,cached_content,feed_id,comments,int_id,
|
||||
marked,unread,published,
|
||||
".SUBSTRING_FOR_DATE."(updated,1,16) as updated,
|
||||
author
|
||||
|
@ -309,7 +309,7 @@ class API extends Handler {
|
|||
"comments" => $line["comments"],
|
||||
"author" => $line["author"],
|
||||
"updated" => strtotime($line["updated"]),
|
||||
"content" => $line["content"],
|
||||
"content" => $line["cached_content"] != "" ? $line["cached_content"] : $line["content"],
|
||||
"feed_id" => $line["feed_id"],
|
||||
"attachments" => $attachments
|
||||
);
|
||||
|
|
|
@ -158,16 +158,20 @@ class Feeds extends Handler_Protected {
|
|||
// Update the feed if required with some basic flood control
|
||||
|
||||
$result = db_query($this->link,
|
||||
"SELECT cache_images,".SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated
|
||||
"SELECT cache_images,cache_content,".SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated
|
||||
FROM ttrss_feeds WHERE id = '$feed'");
|
||||
|
||||
if (db_num_rows($result) != 0) {
|
||||
$last_updated = strtotime(db_fetch_result($result, 0, "last_updated"));
|
||||
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
|
||||
$cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
|
||||
|
||||
if (!$cache_images && time() - $last_updated > 120 || isset($_REQUEST['DevForceUpdate'])) {
|
||||
if (!$cache_images && !$cache_content && time() - $last_updated > 120 || isset($_REQUEST['DevForceUpdate'])) {
|
||||
include "rssfuncs.php";
|
||||
update_rss_feed($this->link, $feed, true, true);
|
||||
} else {
|
||||
db_query($this->link, "UPDATE ttrss_feeds SET last_updated = '1970-01-01', last_update_started = '1970-01-01'
|
||||
WHERE id = '$feed'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -234,6 +238,7 @@ class Feeds extends Handler_Protected {
|
|||
$feed_title = $qfh_ret[1];
|
||||
$feed_site_url = $qfh_ret[2];
|
||||
$last_error = $qfh_ret[3];
|
||||
$cache_content = true;
|
||||
|
||||
$vgroup_last_feed = $vgr_last_feed;
|
||||
|
||||
|
@ -627,6 +632,10 @@ class Feeds extends Handler_Protected {
|
|||
|
||||
$feed_site_url = $line["site_url"];
|
||||
|
||||
if ($cache_content && $line["cached_content"] != "") {
|
||||
$line["content_preview"] =& $line["cached_content"];
|
||||
}
|
||||
|
||||
$article_content = sanitize($this->link, $line["content_preview"],
|
||||
false, false, $feed_site_url);
|
||||
|
||||
|
|
|
@ -653,6 +653,19 @@ class Pref_Feeds extends Handler_Protected {
|
|||
$checked> <label for=\"cache_images\">".
|
||||
__('Cache images locally')."</label>";
|
||||
|
||||
$cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
|
||||
|
||||
if ($cache_content) {
|
||||
$checked = "checked=\"1\"";
|
||||
} else {
|
||||
$checked = "";
|
||||
}
|
||||
|
||||
print "<hr/><input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" id=\"cache_content\"
|
||||
name=\"cache_content\"
|
||||
$checked> <label for=\"cache_content\">".
|
||||
__('Cache content locally')."</label>";
|
||||
|
||||
$mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update"));
|
||||
|
||||
if ($mark_unread_on_update) {
|
||||
|
@ -914,6 +927,8 @@ class Pref_Feeds extends Handler_Protected {
|
|||
db_escape_string($_POST["include_in_digest"]));
|
||||
$cache_images = checkbox_to_sql_bool(
|
||||
db_escape_string($_POST["cache_images"]));
|
||||
$cache_content = checkbox_to_sql_bool(
|
||||
db_escape_string($_POST["cache_content"]));
|
||||
$update_method = (int) db_escape_string($_POST["update_method"]);
|
||||
|
||||
$always_display_enclosures = checkbox_to_sql_bool(
|
||||
|
@ -938,8 +953,6 @@ class Pref_Feeds extends Handler_Protected {
|
|||
$category_qpart_nocomma = "";
|
||||
}
|
||||
|
||||
$cache_images_qpart = "cache_images = $cache_images,";
|
||||
|
||||
if (!$batch) {
|
||||
|
||||
$result = db_query($this->link, "UPDATE ttrss_feeds SET
|
||||
|
@ -951,7 +964,8 @@ class Pref_Feeds extends Handler_Protected {
|
|||
auth_pass = '$auth_pass',
|
||||
private = $private,
|
||||
rtl_content = $rtl_content,
|
||||
$cache_images_qpart
|
||||
cache_images = $cache_images,
|
||||
cache_content = $cache_content,
|
||||
include_in_digest = $include_in_digest,
|
||||
always_display_enclosures = $always_display_enclosures,
|
||||
mark_unread_on_update = $mark_unread_on_update,
|
||||
|
@ -1023,6 +1037,10 @@ class Pref_Feeds extends Handler_Protected {
|
|||
$qpart = "cache_images = $cache_images";
|
||||
break;
|
||||
|
||||
case "cache_content":
|
||||
$qpart = "cache_content = $cache_content";
|
||||
break;
|
||||
|
||||
case "rtl_content":
|
||||
$qpart = "rtl_content = $rtl_content";
|
||||
break;
|
||||
|
|
|
@ -125,6 +125,7 @@ function confirmOP() {
|
|||
foreach (array_keys($update_versions) as $v) {
|
||||
if ($v == $version + 1) {
|
||||
print "<p>".T_sprintf("Updating to version %d...", $v)."</p>";
|
||||
db_query($link, "BEGIN");
|
||||
$fp = fopen($update_versions[$v], "r");
|
||||
if ($fp) {
|
||||
while (!feof($fp)) {
|
||||
|
@ -136,6 +137,7 @@ function confirmOP() {
|
|||
}
|
||||
}
|
||||
fclose($fp);
|
||||
db_query($link, "COMMIT");
|
||||
|
||||
print "<p>".__("Checking version... ");
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<?php
|
||||
define('EXPECTED_CONFIG_VERSION', 26);
|
||||
define('SCHEMA_VERSION', 98);
|
||||
define('SCHEMA_VERSION', 99);
|
||||
|
||||
$fetch_last_error = false;
|
||||
|
||||
|
@ -2367,7 +2367,7 @@
|
|||
}
|
||||
}
|
||||
|
||||
$content_query_part = "content as content_preview,";
|
||||
$content_query_part = "content as content_preview, cached_content, ";
|
||||
|
||||
if (is_numeric($feed)) {
|
||||
|
||||
|
@ -3111,15 +3111,17 @@
|
|||
|
||||
//if (!$zoom_mode) { print "<article id='$id'><![CDATA["; };
|
||||
|
||||
$result = db_query($link, "SELECT rtl_content, always_display_enclosures FROM ttrss_feeds
|
||||
$result = db_query($link, "SELECT rtl_content, always_display_enclosures, cache_content FROM ttrss_feeds
|
||||
WHERE id = '$feed_id' AND owner_uid = $owner_uid");
|
||||
|
||||
if (db_num_rows($result) == 1) {
|
||||
$rtl_content = sql_bool_to_bool(db_fetch_result($result, 0, "rtl_content"));
|
||||
$always_display_enclosures = sql_bool_to_bool(db_fetch_result($result, 0, "always_display_enclosures"));
|
||||
$cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
|
||||
} else {
|
||||
$rtl_content = false;
|
||||
$always_display_enclosures = false;
|
||||
$cache_content = false;
|
||||
}
|
||||
|
||||
if ($rtl_content) {
|
||||
|
@ -3146,7 +3148,8 @@
|
|||
tag_cache,
|
||||
author,
|
||||
orig_feed_id,
|
||||
note
|
||||
note,
|
||||
cached_content
|
||||
FROM ttrss_entries,ttrss_user_entries
|
||||
WHERE id = '$id' AND ref_id = id AND owner_uid = $owner_uid");
|
||||
|
||||
|
@ -3344,6 +3347,10 @@
|
|||
}
|
||||
}
|
||||
|
||||
if ($cache_content && $line["cached_content"] != "") {
|
||||
$line["content"] =& $line["cached_content"];
|
||||
}
|
||||
|
||||
$article_content = sanitize($link, $line["content"], false, $owner_uid,
|
||||
$feed_site_url);
|
||||
|
||||
|
@ -4460,6 +4467,11 @@
|
|||
}
|
||||
|
||||
if ($show_content) {
|
||||
|
||||
if ($line["cached_content"] != "") {
|
||||
$line["content_preview"] =& $line["cached_content"];
|
||||
}
|
||||
|
||||
if ($sanitize_content) {
|
||||
$headline_row["content"] = sanitize($link,
|
||||
$line["content_preview"], false, false, $line["site_url"]);
|
||||
|
|
|
@ -157,6 +157,7 @@
|
|||
|
||||
} // function update_daemon_common
|
||||
|
||||
// ignore_daemon is not used
|
||||
function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
|
||||
$override_url = false) {
|
||||
|
||||
|
@ -166,36 +167,16 @@
|
|||
|
||||
$debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
|
||||
|
||||
if (!$_REQUEST["daemon"] && !$ignore_daemon) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: start");
|
||||
}
|
||||
|
||||
if (!$ignore_daemon) {
|
||||
|
||||
if (DB_TYPE == "pgsql") {
|
||||
$updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
|
||||
} else {
|
||||
$updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
|
||||
}
|
||||
|
||||
$result = db_query($link, "SELECT id,update_interval,auth_login,
|
||||
auth_pass,cache_images,update_method,last_updated
|
||||
FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart");
|
||||
|
||||
} else {
|
||||
|
||||
$result = db_query($link, "SELECT id,update_interval,auth_login,
|
||||
feed_url,auth_pass,cache_images,update_method,last_updated,
|
||||
feed_url,auth_pass,cache_images,update_method,last_updated,cache_content,
|
||||
mark_unread_on_update, owner_uid, update_on_checksum_change,
|
||||
pubsub_state
|
||||
FROM ttrss_feeds WHERE id = '$feed'");
|
||||
|
||||
}
|
||||
|
||||
if (db_num_rows($result) == 0) {
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: feed $feed NOT FOUND/SKIPPED");
|
||||
|
@ -240,6 +221,7 @@
|
|||
}
|
||||
|
||||
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
|
||||
$cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
|
||||
$fetch_url = db_fetch_result($result, 0, "feed_url");
|
||||
|
||||
$feed = db_escape_string($feed);
|
||||
|
@ -624,6 +606,7 @@
|
|||
}
|
||||
|
||||
$entry_content_unescaped = $entry_content;
|
||||
$entry_cached_content = "";
|
||||
|
||||
if ($use_simplepie) {
|
||||
$entry_comments = strip_tags($item->data["comments"]);
|
||||
|
@ -782,6 +765,20 @@
|
|||
_debug("update_rss_feed: base guid not found");
|
||||
}
|
||||
|
||||
if ($cache_content) {
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: caching content...");
|
||||
}
|
||||
|
||||
$entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
|
||||
|
||||
if ($cache_images && is_writable(CACHE_DIR . '/images'))
|
||||
$entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
|
||||
|
||||
$entry_cached_content = db_escape_string($entry_cached_content, false);
|
||||
|
||||
}
|
||||
|
||||
// base post entry does not exist, create it
|
||||
|
||||
$result = db_query($link,
|
||||
|
@ -792,6 +789,7 @@
|
|||
updated,
|
||||
content,
|
||||
content_hash,
|
||||
cached_content,
|
||||
no_orig_date,
|
||||
date_updated,
|
||||
date_entered,
|
||||
|
@ -804,6 +802,7 @@
|
|||
'$entry_link',
|
||||
'$entry_timestamp_fmt',
|
||||
'$entry_content',
|
||||
'$entry_cached_content',
|
||||
'$content_hash',
|
||||
$no_orig_date,
|
||||
NOW(),
|
||||
|
@ -996,6 +995,19 @@
|
|||
if ($content_hash != $orig_content_hash) {
|
||||
$post_needs_update = true;
|
||||
$update_insignificant = false;
|
||||
|
||||
if ($cache_content) {
|
||||
if ($debug_enabled) {
|
||||
_debug("update_rss_feed: caching content because original checksum changed...");
|
||||
}
|
||||
|
||||
$entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
|
||||
|
||||
if ($cache_images && is_writable(CACHE_DIR . '/images'))
|
||||
$entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
|
||||
|
||||
$entry_cached_content = db_escape_string($entry_cached_content, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (db_escape_string($orig_title) != $entry_title) {
|
||||
|
@ -1016,6 +1028,7 @@
|
|||
db_query($link, "UPDATE ttrss_entries
|
||||
SET title = '$entry_title', content = '$entry_content',
|
||||
content_hash = '$content_hash',
|
||||
cached_content = '$entry_cached_content',
|
||||
updated = '$entry_timestamp_fmt',
|
||||
num_comments = '$num_comments'
|
||||
WHERE id = '$ref_id'");
|
||||
|
@ -1484,4 +1497,25 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
function cache_content($link, $url, $login, $pass) {
|
||||
|
||||
$content = fetch_file_contents($url, $login, $pass);
|
||||
|
||||
if ($content) {
|
||||
$doc = new DOMDocument();
|
||||
@$doc->loadHTML($content);
|
||||
$xpath = new DOMXPath($doc);
|
||||
|
||||
$node = $doc->getElementsByTagName('body')->item(0);
|
||||
|
||||
if ($node) {
|
||||
$content = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
|
||||
|
||||
return $content;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
?>
|
||||
|
|
|
@ -116,6 +116,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key,
|
|||
hidden bool not null default false,
|
||||
include_in_digest boolean not null default true,
|
||||
cache_images boolean not null default false,
|
||||
cache_content boolean not null default false,
|
||||
auth_pass_encrypted boolean not null default false,
|
||||
last_viewed datetime default null,
|
||||
last_update_started datetime default null,
|
||||
|
@ -150,6 +151,7 @@ create table ttrss_entries (id integer not null primary key auto_increment,
|
|||
updated datetime not null,
|
||||
content longtext not null,
|
||||
content_hash varchar(250) not null,
|
||||
cached_content longtext,
|
||||
no_orig_date bool not null default 0,
|
||||
date_entered datetime not null,
|
||||
date_updated datetime not null,
|
||||
|
@ -306,7 +308,7 @@ create table ttrss_tags (id integer primary key auto_increment,
|
|||
|
||||
create table ttrss_version (schema_version int not null) ENGINE=InnoDB DEFAULT CHARSET=UTF8;
|
||||
|
||||
insert into ttrss_version values (98);
|
||||
insert into ttrss_version values (99);
|
||||
|
||||
create table ttrss_enclosures (id integer primary key auto_increment,
|
||||
content_url text not null,
|
||||
|
|
|
@ -78,6 +78,7 @@ create table ttrss_feeds (id serial not null primary key,
|
|||
include_in_digest boolean not null default true,
|
||||
rtl_content boolean not null default false,
|
||||
cache_images boolean not null default false,
|
||||
cache_content boolean not null default false,
|
||||
last_viewed timestamp default null,
|
||||
last_update_started timestamp default null,
|
||||
update_method integer not null default 0,
|
||||
|
@ -130,6 +131,7 @@ create table ttrss_entries (id serial not null primary key,
|
|||
updated timestamp not null,
|
||||
content text not null,
|
||||
content_hash varchar(250) not null,
|
||||
cached_content text,
|
||||
no_orig_date boolean not null default false,
|
||||
date_entered timestamp not null,
|
||||
date_updated timestamp not null,
|
||||
|
@ -254,7 +256,7 @@ create index ttrss_tags_post_int_id_idx on ttrss_tags(post_int_id);
|
|||
|
||||
create table ttrss_version (schema_version int not null);
|
||||
|
||||
insert into ttrss_version values (98);
|
||||
insert into ttrss_version values (99);
|
||||
|
||||
create table ttrss_enclosures (id serial not null primary key,
|
||||
content_url text not null,
|
||||
|
|
12
schema/versions/mysql/99.sql
Normal file
12
schema/versions/mysql/99.sql
Normal file
|
@ -0,0 +1,12 @@
|
|||
begin;
|
||||
|
||||
alter table ttrss_feeds add column cache_content bool;
|
||||
update ttrss_feeds set cache_content = false;
|
||||
alter table ttrss_feeds change cache_content cache_content bool not null;
|
||||
alter table ttrss_feeds alter column cache_content set default false;
|
||||
|
||||
alter table ttrss_entries add column cached_content longtext;
|
||||
|
||||
update ttrss_version set schema_version = 99;
|
||||
|
||||
commit;
|
12
schema/versions/pgsql/99.sql
Normal file
12
schema/versions/pgsql/99.sql
Normal file
|
@ -0,0 +1,12 @@
|
|||
begin;
|
||||
|
||||
alter table ttrss_feeds add column cache_content boolean;
|
||||
update ttrss_feeds set cache_content = false;
|
||||
alter table ttrss_feeds alter column cache_content set not null;
|
||||
alter table ttrss_feeds alter column cache_content set default false;
|
||||
|
||||
alter table ttrss_entries add column cached_content text;
|
||||
|
||||
update ttrss_version set schema_version = 99;
|
||||
|
||||
commit;
|
Loading…
Reference in a new issue