use transactions in feed update process, better handle feeds without entry dates (schema updated)
This commit is contained in:
parent
9d9c9dd645
commit
b82af8c382
3 changed files with 55 additions and 23 deletions
|
@ -17,6 +17,14 @@
|
||||||
|
|
||||||
$subop = $_GET["subop"];
|
$subop = $_GET["subop"];
|
||||||
|
|
||||||
|
if ($subop == "forceUpdateAll") {
|
||||||
|
update_all_feeds($link, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($subop == "updateAll") {
|
||||||
|
update_all_feeds($link, false);
|
||||||
|
}
|
||||||
|
|
||||||
if ($subop == "catchupPage") {
|
if ($subop == "catchupPage") {
|
||||||
|
|
||||||
$ids = split(",", $_GET["ids"]);
|
$ids = split(",", $_GET["ids"]);
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
|
|
||||||
function update_all_feeds($link, $fetch) {
|
function update_all_feeds($link, $fetch) {
|
||||||
|
|
||||||
|
pg_query("BEGIN");
|
||||||
|
|
||||||
if (!$fetch) {
|
if (!$fetch) {
|
||||||
|
|
||||||
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE
|
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE
|
||||||
|
@ -21,6 +23,8 @@
|
||||||
$num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
|
$num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pg_query("COMMIT");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function update_rss_feed($link, $feed_url, $feed) {
|
function update_rss_feed($link, $feed_url, $feed) {
|
||||||
|
@ -31,12 +35,13 @@
|
||||||
|
|
||||||
if ($rss) {
|
if ($rss) {
|
||||||
|
|
||||||
|
pg_query("BEGIN");
|
||||||
|
|
||||||
$result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'");
|
$result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'");
|
||||||
|
|
||||||
$registered_title = pg_fetch_result($result, 0, "title");
|
$registered_title = pg_fetch_result($result, 0, "title");
|
||||||
|
|
||||||
if (!$registered_title) {
|
if (!$registered_title) {
|
||||||
|
|
||||||
$feed_title = $rss->channel["title"];
|
$feed_title = $rss->channel["title"];
|
||||||
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
|
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
|
||||||
}
|
}
|
||||||
|
@ -54,10 +59,17 @@
|
||||||
$rss_1_date = $item['dc']['date'];
|
$rss_1_date = $item['dc']['date'];
|
||||||
$atom_date = $item['issued'];
|
$atom_date = $item['issued'];
|
||||||
|
|
||||||
|
$no_orig_date = 'false';
|
||||||
|
|
||||||
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
|
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
|
||||||
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
|
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
|
||||||
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
|
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
|
||||||
if ($entry_timestamp == "") $entry_timestamp = 0;
|
// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
|
||||||
|
|
||||||
|
if ($entry_timestamp == "") {
|
||||||
|
$entry_timestamp = time();
|
||||||
|
$no_orig_date = 'true';
|
||||||
|
}
|
||||||
|
|
||||||
if (!$entry_timestamp) continue;
|
if (!$entry_timestamp) continue;
|
||||||
|
|
||||||
|
@ -77,7 +89,7 @@
|
||||||
|
|
||||||
$result = pg_query($link, "
|
$result = pg_query($link, "
|
||||||
SELECT
|
SELECT
|
||||||
id,unread,md5_hash,last_read,
|
id,unread,md5_hash,last_read,no_orig_date,title,
|
||||||
EXTRACT(EPOCH FROM updated) as updated_timestamp
|
EXTRACT(EPOCH FROM updated) as updated_timestamp
|
||||||
FROM
|
FROM
|
||||||
ttrss_entries
|
ttrss_entries
|
||||||
|
@ -89,11 +101,12 @@
|
||||||
$entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
|
$entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
|
||||||
|
|
||||||
$query = "INSERT INTO ttrss_entries
|
$query = "INSERT INTO ttrss_entries
|
||||||
(title, guid, link, updated, content, feed_id, md5_hash)
|
(title, guid, link, updated, content, feed_id,
|
||||||
|
md5_hash, no_orig_date)
|
||||||
VALUES
|
VALUES
|
||||||
('$entry_title', '$entry_guid', '$entry_link',
|
('$entry_title', '$entry_guid', '$entry_link',
|
||||||
'$entry_timestamp', '$entry_content', '$feed',
|
'$entry_timestamp', '$entry_content', '$feed',
|
||||||
'$content_md5')";
|
'$content_md5', $no_orig_date)";
|
||||||
|
|
||||||
$result = pg_query($link, $query);
|
$result = pg_query($link, $query);
|
||||||
|
|
||||||
|
@ -108,33 +121,41 @@
|
||||||
|
|
||||||
$unread = pg_fetch_result($result, 0, "unread");
|
$unread = pg_fetch_result($result, 0, "unread");
|
||||||
$md5_hash = pg_fetch_result($result, 0, "md5_hash");
|
$md5_hash = pg_fetch_result($result, 0, "md5_hash");
|
||||||
|
$no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
|
||||||
|
$orig_title = pg_fetch_result($result, 0, "title");
|
||||||
|
|
||||||
// if ($md5_hash != $content_md5 && CONTENT_CHECK_MD5)
|
// disable update detection for posts which didn't have correct
|
||||||
// $unread = "true";
|
// publishment date, because they will always register as updated
|
||||||
|
// sadly this doesn't catch feed generators which input current date
|
||||||
|
// in posts all the time (some planets do this)
|
||||||
|
|
||||||
if (!$last_read || $md5_hash != $content_md5) {
|
if ($no_orig_date != 't' && (!$last_read || $md5_hash != $content_md5)) {
|
||||||
$last_read = 'null';
|
$last_read_qpart = 'last_read = null,';
|
||||||
} else {
|
} else {
|
||||||
$last_read = "'$last_read'";
|
$last_read_qpart = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// if ($unread || !CONTENT_CHECK_MD5) {
|
// mark post as updated on title change
|
||||||
// $updated_query_part = "updated = '$entry_timestamp',";
|
// maybe we should mark it as unread instead?
|
||||||
// }
|
|
||||||
|
|
||||||
// if ($updated_timestamp > $entry_timestamp) {
|
if ($orig_title != $entry_title) {
|
||||||
// $unread = "true";
|
$last_read_qpart = 'last_read = null,';
|
||||||
// print "$updated_timestamp : $entry_timestamp<br>";
|
}
|
||||||
// }
|
|
||||||
|
// don't bother updating timestamps on posts with broken pubDate
|
||||||
|
|
||||||
|
if ($no_orig_date != 't') {
|
||||||
|
$update_timestamp_qpart = "updated = '$entry_timestamp_fmt',";
|
||||||
|
}
|
||||||
|
|
||||||
$query = "UPDATE ttrss_entries
|
$query = "UPDATE ttrss_entries
|
||||||
SET
|
SET
|
||||||
title ='$entry_title',
|
title ='$entry_title',
|
||||||
link = '$entry_link',
|
link = '$entry_link',
|
||||||
updated = '$entry_timestamp_fmt',
|
$update_timestamp_qpart
|
||||||
|
$last_read_qpart
|
||||||
content = '$entry_content',
|
content = '$entry_content',
|
||||||
md5_hash = '$content_md5',
|
md5_hash = '$content_md5',
|
||||||
last_read = $last_read,
|
|
||||||
unread = '$unread'
|
unread = '$unread'
|
||||||
WHERE
|
WHERE
|
||||||
id = '$entry_id'";
|
id = '$entry_id'";
|
||||||
|
@ -151,6 +172,8 @@
|
||||||
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
|
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pg_query("COMMIT");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,5 +34,6 @@ create table ttrss_entries (id serial not null primary key,
|
||||||
md5_hash varchar(200) not null unique,
|
md5_hash varchar(200) not null unique,
|
||||||
content text not null,
|
content text not null,
|
||||||
last_read timestamp,
|
last_read timestamp,
|
||||||
unread boolean default true);
|
no_orig_date boolean not null default false,
|
||||||
|
unread boolean not null default true);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue