cleaned up headline update process, fixed bug mentioned in previous patch

This commit is contained in:
Andrew Dolgov 2005-08-23 11:51:29 +01:00
parent cb0bd8bd0f
commit 466001c4fa
2 changed files with 68 additions and 59 deletions

View file

@ -17,10 +17,8 @@
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds"); $result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds");
} }
$num_unread = 0;
while ($line = pg_fetch_assoc($result)) { while ($line = pg_fetch_assoc($result)) {
$num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]); update_rss_feed($link, $line["feed_url"], $line["id"]);
} }
pg_query("COMMIT"); pg_query("COMMIT");
@ -33,7 +31,6 @@
$rss = fetch_rss($feed_url); $rss = fetch_rss($feed_url);
error_reporting (E_ERROR | E_WARNING | E_PARSE); error_reporting (E_ERROR | E_WARNING | E_PARSE);
$num_unread = 0;
if ($rss) { if ($rss) {
@ -46,8 +43,6 @@
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'"); pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
} }
pg_query("BEGIN");
foreach ($rss->items as $item) { foreach ($rss->items as $item) {
$entry_guid = $item["id"]; $entry_guid = $item["id"];
@ -55,25 +50,26 @@
if (!$entry_guid) $entry_guid = $item["guid"]; if (!$entry_guid) $entry_guid = $item["guid"];
if (!$entry_guid) $entry_guid = $item["link"]; if (!$entry_guid) $entry_guid = $item["link"];
if (!$entry_guid) continue;
$entry_timestamp = ""; $entry_timestamp = "";
$rss_2_date = $item['pubdate']; $rss_2_date = $item['pubdate'];
$rss_1_date = $item['dc']['date']; $rss_1_date = $item['dc']['date'];
$atom_date = $item['issued']; $atom_date = $item['issued'];
$no_orig_date = 'false';
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date); if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date); if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date); if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
if ($entry_timestamp == "") { if ($entry_timestamp == "") {
$entry_timestamp = time(); $entry_timestamp = time();
$no_orig_date = 'true'; $no_orig_date = 'true';
} else {
$no_orig_date = 'false';
} }
if (!$entry_timestamp) continue; $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
$entry_title = $item["title"]; $entry_title = $item["title"];
$entry_link = $item["link"]; $entry_link = $item["link"];
@ -82,18 +78,16 @@
if (!$entry_link) continue; if (!$entry_link) continue;
$entry_content = $item["description"]; $entry_content = $item["description"];
if (!$entry_content) $entry_content = $item["content:escaped"];
if (!$entry_content) $entry_content = $item["content"]; if (!$entry_content) $entry_content = $item["content"];
if (!$entry_content) continue; if (!$entry_content) continue;
$entry_content = pg_escape_string($entry_content); $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
$entry_title = pg_escape_string($entry_title);
$content_md5 = md5(strip_tags($entry_content));
$result = pg_query($link, " $result = pg_query($link, "
SELECT SELECT
id,unread,md5_hash,last_read,no_orig_date,title, id,last_read,no_orig_date,title,feed_id,content_hash,
EXTRACT(EPOCH FROM updated) as updated_timestamp EXTRACT(EPOCH FROM updated) as updated_timestamp
FROM FROM
ttrss_entries ttrss_entries
@ -102,71 +96,84 @@
if (pg_num_rows($result) == 0) { if (pg_num_rows($result) == 0) {
$entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); $entry_content = pg_escape_string($entry_content);
$entry_title = pg_escape_string($entry_title);
$entry_link = pg_escape_string($entry_link);
$query = "INSERT INTO ttrss_entries $query = "INSERT
(title, guid, link, updated, content, feed_id, INTO ttrss_entries
md5_hash, no_orig_date) (title,
guid,
link,
updated,
content,
content_hash,
feed_id,
no_orig_date)
VALUES VALUES
('$entry_title', '$entry_guid', '$entry_link', ('$entry_title',
'$entry_timestamp', '$entry_content', '$feed', '$entry_guid',
'$content_md5', $no_orig_date)"; '$entry_link',
'$entry_timestamp_fmt',
'$entry_content',
'$content_hash',
'$feed',
$no_orig_date)";
$result = pg_query($link, $query); $result = pg_query($link, $query);
if ($result) ++$num_unread;
} else { } else {
$entry_id = pg_fetch_result($result, 0, "id"); $orig_entry_id = pg_fetch_result($result, 0, "id");
$updated_timestamp = pg_fetch_result($result, 0, "updated_timestamp"); $orig_feed_id = pg_fetch_result($result, 0, "feed_id");
$entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
$last_read = pg_fetch_result($result, 0, "last_read");
$unread = pg_fetch_result($result, 0, "unread"); if ($orig_feed_id != $feed) {
$md5_hash = pg_fetch_result($result, 0, "md5_hash"); // print "<p>Update from different feed ($orig_feed_id, $feed): $entry_guid [$entry_title]";
$no_orig_date = pg_fetch_result($result, 0, "no_orig_date"); continue;
}
$orig_timestamp = pg_fetch_result($result, 0, "updated_timestamp");
$orig_content_hash = pg_fetch_result($result, 0, "content_hash");
$orig_last_read = pg_fetch_result($result, 0, "last_read");
$orig_no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
$orig_title = pg_fetch_result($result, 0, "title"); $orig_title = pg_fetch_result($result, 0, "title");
if ($content_md5 != $md5_hash) { if ($orig_title != $entry_title) {
$update_md5_qpart = "md5_hash = '$content_md5',";
$last_read_qpart = 'last_read = null,'; $last_read_qpart = 'last_read = null,';
$update_content_qpart = "content = '$entry_content',";
} }
if ($orig_title != $entry_title) { if ($orig_content_hash != $content_hash) {
print "[$orig_title] : [$entry_title]"; $last_read_qpart = 'last_read = null,';
$entry_title_qpart = "title ='$entry_title',";
} }
if ($orig_timestamp < $entry_timestamp) {
$last_read_qpart = 'last_read = null,';
}
$entry_content = pg_escape_string($entry_content);
$entry_title = pg_escape_string($entry_title);
$entry_link = pg_escape_string($entry_link);
$query = "UPDATE ttrss_entries $query = "UPDATE ttrss_entries
SET SET
$entry_title_qpart
link = '$entry_link',
$update_timestamp_qpart
$last_read_qpart $last_read_qpart
$update_md5_qpart title = '$entry_title',
$update_content_qpart link = '$entry_link',
unread = '$unread' updated = '$entry_timestamp_fmt',
content = '$entry_content',
content_hash = '$content_hash'
WHERE WHERE
id = '$entry_id'"; id = '$orig_entry_id'";
print "<pre>".htmlspecialchars($query)."</pre>";
$result = pg_query($link, $query); $result = pg_query($link, $query);
if ($result) ++$num_unread;
} }
} }
if ($result) { if ($result) {
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()"); $result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
} }
pg_query("COMMIT");
} }
} }

View file

@ -10,6 +10,8 @@ insert into ttrss_feeds (title,feed_url) values ('Footnotes', 'http://gnomedeskt
insert into ttrss_feeds (title,feed_url) values ('Freedesktop.org', 'http://planet.freedesktop.org/rss20.xml'); insert into ttrss_feeds (title,feed_url) values ('Freedesktop.org', 'http://planet.freedesktop.org/rss20.xml');
insert into ttrss_feeds (title,feed_url) values ('Planet Debian', 'http://planet.debian.org/rss20.xml'); insert into ttrss_feeds (title,feed_url) values ('Planet Debian', 'http://planet.debian.org/rss20.xml');
insert into ttrss_feeds (title,feed_url) values ('Planet GNOME', 'http://planet.gnome.org/rss20.xml'); insert into ttrss_feeds (title,feed_url) values ('Planet GNOME', 'http://planet.gnome.org/rss20.xml');
insert into ttrss_feeds (title,feed_url) values ('Planet Ubuntu', 'http://planet.ubuntulinux.org/rss20.xml');
insert into ttrss_feeds (title,feed_url) values ('Monologue', 'http://www.go-mono.com/monologue/index.rss'); insert into ttrss_feeds (title,feed_url) values ('Monologue', 'http://www.go-mono.com/monologue/index.rss');
insert into ttrss_feeds (title,feed_url) values ('Latest Linux Kernel Versions', insert into ttrss_feeds (title,feed_url) values ('Latest Linux Kernel Versions',
@ -30,8 +32,8 @@ create table ttrss_entries (id serial not null primary key,
title varchar(250) not null, title varchar(250) not null,
guid varchar(300) not null unique, guid varchar(300) not null unique,
link varchar(300) not null unique, link varchar(300) not null unique,
md5_hash varchar(200) not null,
content text not null, content text not null,
content_hash varchar(250) not null,
last_read timestamp, last_read timestamp,
no_orig_date boolean not null default false, no_orig_date boolean not null default false,
unread boolean not null default true); unread boolean not null default true);