use transactions in feed update process, better handle feeds without entry dates (schema updated)
This commit is contained in:
parent
9d9c9dd645
commit
b82af8c382
3 changed files with 55 additions and 23 deletions
|
@ -17,6 +17,14 @@
|
|||
|
||||
$subop = $_GET["subop"];
|
||||
|
||||
if ($subop == "forceUpdateAll") {
|
||||
update_all_feeds($link, true);
|
||||
}
|
||||
|
||||
if ($subop == "updateAll") {
|
||||
update_all_feeds($link, false);
|
||||
}
|
||||
|
||||
if ($subop == "catchupPage") {
|
||||
|
||||
$ids = split(",", $_GET["ids"]);
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
function update_all_feeds($link, $fetch) {
|
||||
|
||||
pg_query("BEGIN");
|
||||
|
||||
if (!$fetch) {
|
||||
|
||||
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE
|
||||
|
@ -21,6 +23,8 @@
|
|||
$num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
|
||||
}
|
||||
|
||||
pg_query("COMMIT");
|
||||
|
||||
}
|
||||
|
||||
function update_rss_feed($link, $feed_url, $feed) {
|
||||
|
@ -31,12 +35,13 @@
|
|||
|
||||
if ($rss) {
|
||||
|
||||
pg_query("BEGIN");
|
||||
|
||||
$result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'");
|
||||
|
||||
$registered_title = pg_fetch_result($result, 0, "title");
|
||||
|
||||
if (!$registered_title) {
|
||||
|
||||
$feed_title = $rss->channel["title"];
|
||||
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
|
||||
}
|
||||
|
@ -54,10 +59,17 @@
|
|||
$rss_1_date = $item['dc']['date'];
|
||||
$atom_date = $item['issued'];
|
||||
|
||||
$no_orig_date = 'false';
|
||||
|
||||
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
|
||||
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
|
||||
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
|
||||
if ($entry_timestamp == "") $entry_timestamp = 0;
|
||||
// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
|
||||
|
||||
if ($entry_timestamp == "") {
|
||||
$entry_timestamp = time();
|
||||
$no_orig_date = 'true';
|
||||
}
|
||||
|
||||
if (!$entry_timestamp) continue;
|
||||
|
||||
|
@ -77,7 +89,7 @@
|
|||
|
||||
$result = pg_query($link, "
|
||||
SELECT
|
||||
id,unread,md5_hash,last_read,
|
||||
id,unread,md5_hash,last_read,no_orig_date,title,
|
||||
EXTRACT(EPOCH FROM updated) as updated_timestamp
|
||||
FROM
|
||||
ttrss_entries
|
||||
|
@ -89,11 +101,12 @@
|
|||
$entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
|
||||
|
||||
$query = "INSERT INTO ttrss_entries
|
||||
(title, guid, link, updated, content, feed_id, md5_hash)
|
||||
(title, guid, link, updated, content, feed_id,
|
||||
md5_hash, no_orig_date)
|
||||
VALUES
|
||||
('$entry_title', '$entry_guid', '$entry_link',
|
||||
'$entry_timestamp', '$entry_content', '$feed',
|
||||
'$content_md5')";
|
||||
'$content_md5', $no_orig_date)";
|
||||
|
||||
$result = pg_query($link, $query);
|
||||
|
||||
|
@ -108,33 +121,41 @@
|
|||
|
||||
$unread = pg_fetch_result($result, 0, "unread");
|
||||
$md5_hash = pg_fetch_result($result, 0, "md5_hash");
|
||||
$no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
|
||||
$orig_title = pg_fetch_result($result, 0, "title");
|
||||
|
||||
// if ($md5_hash != $content_md5 && CONTENT_CHECK_MD5)
|
||||
// $unread = "true";
|
||||
// disable update detection for posts which didn't have correct
|
||||
// publishment date, because they will always register as updated
|
||||
// sadly this doesn't catch feed generators which input current date
|
||||
// in posts all the time (some planets do this)
|
||||
|
||||
if (!$last_read || $md5_hash != $content_md5) {
|
||||
$last_read = 'null';
|
||||
if ($no_orig_date != 't' && (!$last_read || $md5_hash != $content_md5)) {
|
||||
$last_read_qpart = 'last_read = null,';
|
||||
} else {
|
||||
$last_read = "'$last_read'";
|
||||
$last_read_qpart = '';
|
||||
}
|
||||
|
||||
// if ($unread || !CONTENT_CHECK_MD5) {
|
||||
// $updated_query_part = "updated = '$entry_timestamp',";
|
||||
// }
|
||||
// mark post as updated on title change
|
||||
// maybe we should mark it as unread instead?
|
||||
|
||||
// if ($updated_timestamp > $entry_timestamp) {
|
||||
// $unread = "true";
|
||||
// print "$updated_timestamp : $entry_timestamp<br>";
|
||||
// }
|
||||
if ($orig_title != $entry_title) {
|
||||
$last_read_qpart = 'last_read = null,';
|
||||
}
|
||||
|
||||
// don't bother updating timestamps on posts with broken pubDate
|
||||
|
||||
if ($no_orig_date != 't') {
|
||||
$update_timestamp_qpart = "updated = '$entry_timestamp_fmt',";
|
||||
}
|
||||
|
||||
$query = "UPDATE ttrss_entries
|
||||
SET
|
||||
title ='$entry_title',
|
||||
link = '$entry_link',
|
||||
updated = '$entry_timestamp_fmt',
|
||||
$update_timestamp_qpart
|
||||
$last_read_qpart
|
||||
content = '$entry_content',
|
||||
md5_hash = '$content_md5',
|
||||
last_read = $last_read,
|
||||
unread = '$unread'
|
||||
WHERE
|
||||
id = '$entry_id'";
|
||||
|
@ -151,6 +172,8 @@
|
|||
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
|
||||
}
|
||||
|
||||
pg_query("COMMIT");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -34,5 +34,6 @@ create table ttrss_entries (id serial not null primary key,
|
|||
md5_hash varchar(200) not null unique,
|
||||
content text not null,
|
||||
last_read timestamp,
|
||||
unread boolean default true);
|
||||
no_orig_date boolean not null default false,
|
||||
unread boolean not null default true);
|
||||
|
||||
|
|
Loading…
Reference in a new issue