add support for http 304 not modified (no timestamp calculation bullshit like last time)

This commit is contained in:
Andrew Dolgov 2017-08-17 14:40:21 +03:00
parent edefcec19d
commit 153cb6d305
6 changed files with 96 additions and 22 deletions

View file

@ -292,7 +292,8 @@ class RSSUtils {
$result = db_query("SELECT id,update_interval,auth_login, $result = db_query("SELECT id,update_interval,auth_login,
feed_url,auth_pass,cache_images, feed_url,auth_pass,cache_images,
mark_unread_on_update, owner_uid, mark_unread_on_update, owner_uid,
auth_pass_encrypted, feed_language auth_pass_encrypted, feed_language,
last_modified
FROM ttrss_feeds WHERE id = '$feed'"); FROM ttrss_feeds WHERE id = '$feed'");
$owner_uid = db_fetch_result($result, 0, "owner_uid"); $owner_uid = db_fetch_result($result, 0, "owner_uid");
@ -312,6 +313,7 @@ class RSSUtils {
$auth_pass = decrypt_string($auth_pass); $auth_pass = decrypt_string($auth_pass);
} }
$stored_last_modified = db_fetch_result($result, 0, "last_modified");
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images")); $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
$fetch_url = db_fetch_result($result, 0, "feed_url"); $fetch_url = db_fetch_result($result, 0, "feed_url");
$feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language"))); $feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language")));
@ -359,18 +361,33 @@ class RSSUtils {
_debug("local cache will not be used for this feed", $debug_enabled); _debug("local cache will not be used for this feed", $debug_enabled);
} }
global $fetch_last_modified;
// fetch feed from source // fetch feed from source
if (!$feed_data) { if (!$feed_data) {
_debug("stored last modified: $stored_last_modified", $debug_enabled);
_debug("fetching [$fetch_url]...", $debug_enabled); _debug("fetching [$fetch_url]...", $debug_enabled);
if (ini_get("open_basedir") && function_exists("curl_init")) { if (ini_get("open_basedir") && function_exists("curl_init")) {
_debug("not using CURL due to open_basedir restrictions"); _debug("not using CURL due to open_basedir restrictions");
} }
$feed_data = fetch_file_contents($fetch_url, false, /*$feed_data = fetch_file_contents($fetch_url, false,
$auth_login, $auth_pass, false, $auth_login, $auth_pass, false,
$no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT, $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
0); 0);*/
// TODO: last_modified should be limited, if the feed has not been updated for a while
// we probably should force one update without the header
// unfortunately last_updated gets bumped on http 304 so that daemon would work properly
$feed_data = fetch_file_contents([
"url" => $fetch_url,
"login" => $auth_login,
"pass" => $auth_pass,
"timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
"last_modified" => $force_refetch ? "" : $stored_last_modified
]);
global $fetch_curl_used; global $fetch_curl_used;
@ -383,6 +400,14 @@ class RSSUtils {
$feed_data = trim($feed_data); $feed_data = trim($feed_data);
_debug("fetch done.", $debug_enabled); _debug("fetch done.", $debug_enabled);
_debug("source last modified: " . $fetch_last_modified);
if ($feed_data && $fetch_last_modified != $stored_last_modified) {
$last_modified_escaped = db_escape_string(substr($fetch_last_modified, 0, 245));
db_query("UPDATE ttrss_feeds SET last_modified = '$last_modified_escaped' WHERE id = '$feed'");
}
// cache vanilla feed data for re-use // cache vanilla feed data for re-use
if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) { if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) {

View file

@ -1,6 +1,6 @@
<?php <?php
define('EXPECTED_CONFIG_VERSION', 26); define('EXPECTED_CONFIG_VERSION', 26);
define('SCHEMA_VERSION', 131); define('SCHEMA_VERSION', 132);
define('LABEL_BASE_INDEX', -1024); define('LABEL_BASE_INDEX', -1024);
define('PLUGIN_FEED_BASE_INDEX', -128); define('PLUGIN_FEED_BASE_INDEX', -128);
@ -335,6 +335,7 @@
global $fetch_last_error_code; global $fetch_last_error_code;
global $fetch_last_error_content; global $fetch_last_error_content;
global $fetch_last_content_type; global $fetch_last_content_type;
global $fetch_last_modified;
global $fetch_curl_used; global $fetch_curl_used;
$fetch_last_error = false; $fetch_last_error = false;
@ -342,11 +343,12 @@
$fetch_last_error_content = ""; $fetch_last_error_content = "";
$fetch_last_content_type = ""; $fetch_last_content_type = "";
$fetch_curl_used = false; $fetch_curl_used = false;
$fetch_last_modified = "";
if (!is_array($options)) { if (!is_array($options)) {
// falling back on compatibility shim // falling back on compatibility shim
$option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "timestamp", "useragent" ]; $option_names = [ "url", "type", "login", "pass", "post_query", "timeout", "last_modified", "useragent" ];
$tmp = []; $tmp = [];
for ($i = 0; $i < func_num_args(); $i++) { for ($i = 0; $i < func_num_args(); $i++) {
@ -373,7 +375,7 @@
$pass = isset($options["pass"]) ? $options["pass"] : false; $pass = isset($options["pass"]) ? $options["pass"] : false;
$post_query = isset($options["post_query"]) ? $options["post_query"] : false; $post_query = isset($options["post_query"]) ? $options["post_query"] : false;
$timeout = isset($options["timeout"]) ? $options["timeout"] : false; $timeout = isset($options["timeout"]) ? $options["timeout"] : false;
$timestamp = isset($options["timestamp"]) ? $options["timestamp"] : 0; $last_modified = isset($options["last_modified"]) ? $options["last_modified"] : "";
$useragent = isset($options["useragent"]) ? $options["useragent"] : false; $useragent = isset($options["useragent"]) ? $options["useragent"] : false;
$followlocation = isset($options["followlocation"]) ? $options["followlocation"] : true; $followlocation = isset($options["followlocation"]) ? $options["followlocation"] : true;
@ -389,9 +391,9 @@
$ch = curl_init($url); $ch = curl_init($url);
if ($timestamp && !$post_query) { if ($last_modified && !$post_query) {
curl_setopt($ch, CURLOPT_HTTPHEADER, curl_setopt($ch, CURLOPT_HTTPHEADER,
array("If-Modified-Since: ".gmdate('D, d M Y H:i:s \G\M\T', $timestamp))); array("If-Modified-Since: $last_modified"));
} }
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout ? $timeout : FILE_FETCH_CONNECT_TIMEOUT); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout ? $timeout : FILE_FETCH_CONNECT_TIMEOUT);
@ -400,6 +402,7 @@
curl_setopt($ch, CURLOPT_MAXREDIRS, 20); curl_setopt($ch, CURLOPT_MAXREDIRS, 20);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY); curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent ? $useragent : curl_setopt($ch, CURLOPT_USERAGENT, $useragent ? $useragent :
SELF_USER_AGENT); SELF_USER_AGENT);
@ -422,19 +425,25 @@
if ($login && $pass) if ($login && $pass)
curl_setopt($ch, CURLOPT_USERPWD, "$login:$pass"); curl_setopt($ch, CURLOPT_USERPWD, "$login:$pass");
$contents = @curl_exec($ch); $ret = @curl_exec($ch);
$headers_length = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$headers = explode("\r\n", substr($ret, 0, $headers_length));
$contents = substr($ret, $headers_length);
foreach ($headers as $header) {
list ($key, $value) = explode(": ", $header);
if (strtolower($key) == "last-modified") {
$fetch_last_modified = $value;
}
}
if (curl_errno($ch) === 23 || curl_errno($ch) === 61) { if (curl_errno($ch) === 23 || curl_errno($ch) === 61) {
curl_setopt($ch, CURLOPT_ENCODING, 'none'); curl_setopt($ch, CURLOPT_ENCODING, 'none');
$contents = @curl_exec($ch); $contents = @curl_exec($ch);
} }
if ($contents === false) {
$fetch_last_error = curl_errno($ch) . " " . curl_error($ch);
curl_close($ch);
return false;
}
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$fetch_last_content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); $fetch_last_content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
@ -451,6 +460,18 @@
return false; return false;
} }
if (!$contents) {
$fetch_last_error = curl_errno($ch) . " " . curl_error($ch);
curl_close($ch);
return false;
}
/*$fetch_last_modified = curl_getinfo($ch, CURLINFO_FILETIME);
if ($fetch_last_modified != -1) {
echo date("Y-m-d H:i:s", $fetch_last_modified); die;
}*/
curl_close($ch); curl_close($ch);
return $contents; return $contents;
@ -472,15 +493,15 @@
// TODO: should this support POST requests or not? idk // TODO: should this support POST requests or not? idk
if (!$post_query && $timestamp) { if (!$post_query && $last_modified) {
$context = stream_context_create(array( $context = stream_context_create(array(
'http' => array( 'http' => array(
'method' => 'GET', 'method' => 'GET',
'ignore_errors' => true, 'ignore_errors' => true,
'timeout' => $timeout ? $timeout : FILE_FETCH_TIMEOUT, 'timeout' => $timeout ? $timeout : FILE_FETCH_TIMEOUT,
'protocol_version'=> 1.1, 'protocol_version'=> 1.1,
'header' => "If-Modified-Since: ".gmdate("D, d M Y H:i:s \\G\\M\\T\r\n", $timestamp) 'header' => "If-Modified-Since: $last_modified\r\n")
))); ));
} else { } else {
$context = stream_context_create(array( $context = stream_context_create(array(
'http' => array( 'http' => array(
@ -497,10 +518,16 @@
if (isset($http_response_header) && is_array($http_response_header)) { if (isset($http_response_header) && is_array($http_response_header)) {
foreach ($http_response_header as $h) { foreach ($http_response_header as $h) {
if (substr(strtolower($h), 0, 13) == 'content-type:') { list ($key, $value) = explode(": ", $h);
$fetch_last_content_type = substr($h, 14);
$key = strtolower($key);
if ($key == 'content-type') {
$fetch_last_content_type = $value;
// don't abort here b/c there might be more than one // don't abort here b/c there might be more than one
// e.g. if we were being redirected -- last one is the right one // e.g. if we were being redirected -- last one is the right one
} else if ($key == 'last-modified') {
$fetch_last_modified = $value;
} }
if (substr(strtolower($h), 0, 7) == 'http/1.') { if (substr(strtolower($h), 0, 7) == 'http/1.') {

View file

@ -103,6 +103,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key,
purge_interval integer not null default 0, purge_interval integer not null default 0,
last_updated datetime default null, last_updated datetime default null,
last_error varchar(250) not null default '', last_error varchar(250) not null default '',
last_modified varchar(250) not null default '',
favicon_avg_color varchar(11) default null, favicon_avg_color varchar(11) default null,
site_url varchar(250) not null default '', site_url varchar(250) not null default '',
auth_login varchar(250) not null default '', auth_login varchar(250) not null default '',
@ -282,7 +283,7 @@ create table ttrss_tags (id integer primary key auto_increment,
create table ttrss_version (schema_version int not null) ENGINE=InnoDB DEFAULT CHARSET=UTF8; create table ttrss_version (schema_version int not null) ENGINE=InnoDB DEFAULT CHARSET=UTF8;
insert into ttrss_version values (131); insert into ttrss_version values (132);
create table ttrss_enclosures (id integer primary key auto_increment, create table ttrss_enclosures (id integer primary key auto_increment,
content_url text not null, content_url text not null,

View file

@ -73,6 +73,7 @@ create table ttrss_feeds (id serial not null primary key,
purge_interval integer not null default 0, purge_interval integer not null default 0,
last_updated timestamp default null, last_updated timestamp default null,
last_error text not null default '', last_error text not null default '',
last_modified text not null default '',
favicon_avg_color varchar(11) default null, favicon_avg_color varchar(11) default null,
site_url varchar(250) not null default '', site_url varchar(250) not null default '',
auth_login varchar(250) not null default '', auth_login varchar(250) not null default '',
@ -264,7 +265,7 @@ create index ttrss_tags_post_int_id_idx on ttrss_tags(post_int_id);
create table ttrss_version (schema_version int not null); create table ttrss_version (schema_version int not null);
insert into ttrss_version values (131); insert into ttrss_version values (132);
create table ttrss_enclosures (id serial not null primary key, create table ttrss_enclosures (id serial not null primary key,
content_url text not null, content_url text not null,

View file

@ -0,0 +1,10 @@
begin;
alter table ttrss_feeds add column last_modified varchar(250);
update ttrss_feeds set last_modified = '';
alter table ttrss_feeds change last_modified last_modified varchar(250) not null;
alter table ttrss_feeds alter column last_modified set default '';
UPDATE ttrss_version SET schema_version = 132;
commit;

View file

@ -0,0 +1,10 @@
begin;
alter table ttrss_feeds add column last_modified text;
update ttrss_feeds set last_modified = '';
alter table ttrss_feeds alter column last_modified set not null;
alter table ttrss_feeds alter column last_modified set default '';
UPDATE ttrss_version SET schema_version = 132;
commit;