cleaned up headline update process, fixed bug mentioned in previous patch
This commit is contained in:
parent
cb0bd8bd0f
commit
466001c4fa
2 changed files with 68 additions and 59 deletions
109
functions.php
109
functions.php
|
@ -17,10 +17,8 @@
|
||||||
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds");
|
$result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds");
|
||||||
}
|
}
|
||||||
|
|
||||||
$num_unread = 0;
|
|
||||||
|
|
||||||
while ($line = pg_fetch_assoc($result)) {
|
while ($line = pg_fetch_assoc($result)) {
|
||||||
$num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
|
update_rss_feed($link, $line["feed_url"], $line["id"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
pg_query("COMMIT");
|
pg_query("COMMIT");
|
||||||
|
@ -33,7 +31,6 @@
|
||||||
$rss = fetch_rss($feed_url);
|
$rss = fetch_rss($feed_url);
|
||||||
error_reporting (E_ERROR | E_WARNING | E_PARSE);
|
error_reporting (E_ERROR | E_WARNING | E_PARSE);
|
||||||
|
|
||||||
$num_unread = 0;
|
|
||||||
|
|
||||||
if ($rss) {
|
if ($rss) {
|
||||||
|
|
||||||
|
@ -46,8 +43,6 @@
|
||||||
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
|
pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
|
||||||
}
|
}
|
||||||
|
|
||||||
pg_query("BEGIN");
|
|
||||||
|
|
||||||
foreach ($rss->items as $item) {
|
foreach ($rss->items as $item) {
|
||||||
|
|
||||||
$entry_guid = $item["id"];
|
$entry_guid = $item["id"];
|
||||||
|
@ -55,25 +50,26 @@
|
||||||
if (!$entry_guid) $entry_guid = $item["guid"];
|
if (!$entry_guid) $entry_guid = $item["guid"];
|
||||||
if (!$entry_guid) $entry_guid = $item["link"];
|
if (!$entry_guid) $entry_guid = $item["link"];
|
||||||
|
|
||||||
|
if (!$entry_guid) continue;
|
||||||
|
|
||||||
$entry_timestamp = "";
|
$entry_timestamp = "";
|
||||||
|
|
||||||
$rss_2_date = $item['pubdate'];
|
$rss_2_date = $item['pubdate'];
|
||||||
$rss_1_date = $item['dc']['date'];
|
$rss_1_date = $item['dc']['date'];
|
||||||
$atom_date = $item['issued'];
|
$atom_date = $item['issued'];
|
||||||
|
|
||||||
$no_orig_date = 'false';
|
|
||||||
|
|
||||||
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
|
if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
|
||||||
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
|
if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
|
||||||
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
|
if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
|
||||||
// if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
|
|
||||||
|
|
||||||
if ($entry_timestamp == "") {
|
if ($entry_timestamp == "") {
|
||||||
$entry_timestamp = time();
|
$entry_timestamp = time();
|
||||||
$no_orig_date = 'true';
|
$no_orig_date = 'true';
|
||||||
|
} else {
|
||||||
|
$no_orig_date = 'false';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$entry_timestamp) continue;
|
$entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
|
||||||
|
|
||||||
$entry_title = $item["title"];
|
$entry_title = $item["title"];
|
||||||
$entry_link = $item["link"];
|
$entry_link = $item["link"];
|
||||||
|
@ -82,18 +78,16 @@
|
||||||
if (!$entry_link) continue;
|
if (!$entry_link) continue;
|
||||||
|
|
||||||
$entry_content = $item["description"];
|
$entry_content = $item["description"];
|
||||||
|
if (!$entry_content) $entry_content = $item["content:escaped"];
|
||||||
if (!$entry_content) $entry_content = $item["content"];
|
if (!$entry_content) $entry_content = $item["content"];
|
||||||
|
|
||||||
if (!$entry_content) continue;
|
if (!$entry_content) continue;
|
||||||
|
|
||||||
$entry_content = pg_escape_string($entry_content);
|
$content_hash = "SHA1:" . sha1(strip_tags($entry_content));
|
||||||
$entry_title = pg_escape_string($entry_title);
|
|
||||||
|
|
||||||
$content_md5 = md5(strip_tags($entry_content));
|
|
||||||
|
|
||||||
$result = pg_query($link, "
|
$result = pg_query($link, "
|
||||||
SELECT
|
SELECT
|
||||||
id,unread,md5_hash,last_read,no_orig_date,title,
|
id,last_read,no_orig_date,title,feed_id,content_hash,
|
||||||
EXTRACT(EPOCH FROM updated) as updated_timestamp
|
EXTRACT(EPOCH FROM updated) as updated_timestamp
|
||||||
FROM
|
FROM
|
||||||
ttrss_entries
|
ttrss_entries
|
||||||
|
@ -102,71 +96,84 @@
|
||||||
|
|
||||||
if (pg_num_rows($result) == 0) {
|
if (pg_num_rows($result) == 0) {
|
||||||
|
|
||||||
$entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
|
$entry_content = pg_escape_string($entry_content);
|
||||||
|
$entry_title = pg_escape_string($entry_title);
|
||||||
|
$entry_link = pg_escape_string($entry_link);
|
||||||
|
|
||||||
$query = "INSERT INTO ttrss_entries
|
$query = "INSERT
|
||||||
(title, guid, link, updated, content, feed_id,
|
INTO ttrss_entries
|
||||||
md5_hash, no_orig_date)
|
(title,
|
||||||
|
guid,
|
||||||
|
link,
|
||||||
|
updated,
|
||||||
|
content,
|
||||||
|
content_hash,
|
||||||
|
feed_id,
|
||||||
|
no_orig_date)
|
||||||
VALUES
|
VALUES
|
||||||
('$entry_title', '$entry_guid', '$entry_link',
|
('$entry_title',
|
||||||
'$entry_timestamp', '$entry_content', '$feed',
|
'$entry_guid',
|
||||||
'$content_md5', $no_orig_date)";
|
'$entry_link',
|
||||||
|
'$entry_timestamp_fmt',
|
||||||
|
'$entry_content',
|
||||||
|
'$content_hash',
|
||||||
|
'$feed',
|
||||||
|
$no_orig_date)";
|
||||||
|
|
||||||
$result = pg_query($link, $query);
|
$result = pg_query($link, $query);
|
||||||
|
|
||||||
if ($result) ++$num_unread;
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
$entry_id = pg_fetch_result($result, 0, "id");
|
$orig_entry_id = pg_fetch_result($result, 0, "id");
|
||||||
$updated_timestamp = pg_fetch_result($result, 0, "updated_timestamp");
|
$orig_feed_id = pg_fetch_result($result, 0, "feed_id");
|
||||||
$entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
|
|
||||||
$last_read = pg_fetch_result($result, 0, "last_read");
|
|
||||||
|
|
||||||
$unread = pg_fetch_result($result, 0, "unread");
|
if ($orig_feed_id != $feed) {
|
||||||
$md5_hash = pg_fetch_result($result, 0, "md5_hash");
|
// print "<p>Update from different feed ($orig_feed_id, $feed): $entry_guid [$entry_title]";
|
||||||
$no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$orig_timestamp = pg_fetch_result($result, 0, "updated_timestamp");
|
||||||
|
$orig_content_hash = pg_fetch_result($result, 0, "content_hash");
|
||||||
|
$orig_last_read = pg_fetch_result($result, 0, "last_read");
|
||||||
|
$orig_no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
|
||||||
$orig_title = pg_fetch_result($result, 0, "title");
|
$orig_title = pg_fetch_result($result, 0, "title");
|
||||||
|
|
||||||
if ($content_md5 != $md5_hash) {
|
if ($orig_title != $entry_title) {
|
||||||
$update_md5_qpart = "md5_hash = '$content_md5',";
|
|
||||||
$last_read_qpart = 'last_read = null,';
|
$last_read_qpart = 'last_read = null,';
|
||||||
$update_content_qpart = "content = '$entry_content',";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($orig_title != $entry_title) {
|
if ($orig_content_hash != $content_hash) {
|
||||||
print "[$orig_title] : [$entry_title]";
|
$last_read_qpart = 'last_read = null,';
|
||||||
$entry_title_qpart = "title ='$entry_title',";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($orig_timestamp < $entry_timestamp) {
|
||||||
|
$last_read_qpart = 'last_read = null,';
|
||||||
|
}
|
||||||
|
|
||||||
|
$entry_content = pg_escape_string($entry_content);
|
||||||
|
$entry_title = pg_escape_string($entry_title);
|
||||||
|
$entry_link = pg_escape_string($entry_link);
|
||||||
|
|
||||||
$query = "UPDATE ttrss_entries
|
$query = "UPDATE ttrss_entries
|
||||||
SET
|
SET
|
||||||
$entry_title_qpart
|
|
||||||
link = '$entry_link',
|
|
||||||
$update_timestamp_qpart
|
|
||||||
$last_read_qpart
|
$last_read_qpart
|
||||||
$update_md5_qpart
|
title = '$entry_title',
|
||||||
$update_content_qpart
|
link = '$entry_link',
|
||||||
unread = '$unread'
|
updated = '$entry_timestamp_fmt',
|
||||||
|
content = '$entry_content',
|
||||||
|
content_hash = '$content_hash'
|
||||||
WHERE
|
WHERE
|
||||||
id = '$entry_id'";
|
id = '$orig_entry_id'";
|
||||||
|
|
||||||
print "<pre>".htmlspecialchars($query)."</pre>";
|
|
||||||
|
|
||||||
$result = pg_query($link, $query);
|
$result = pg_query($link, $query);
|
||||||
|
|
||||||
if ($result) ++$num_unread;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($result) {
|
if ($result) {
|
||||||
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
|
$result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
|
||||||
}
|
}
|
||||||
|
|
||||||
pg_query("COMMIT");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,6 +10,8 @@ insert into ttrss_feeds (title,feed_url) values ('Footnotes', 'http://gnomedeskt
|
||||||
insert into ttrss_feeds (title,feed_url) values ('Freedesktop.org', 'http://planet.freedesktop.org/rss20.xml');
|
insert into ttrss_feeds (title,feed_url) values ('Freedesktop.org', 'http://planet.freedesktop.org/rss20.xml');
|
||||||
insert into ttrss_feeds (title,feed_url) values ('Planet Debian', 'http://planet.debian.org/rss20.xml');
|
insert into ttrss_feeds (title,feed_url) values ('Planet Debian', 'http://planet.debian.org/rss20.xml');
|
||||||
insert into ttrss_feeds (title,feed_url) values ('Planet GNOME', 'http://planet.gnome.org/rss20.xml');
|
insert into ttrss_feeds (title,feed_url) values ('Planet GNOME', 'http://planet.gnome.org/rss20.xml');
|
||||||
|
insert into ttrss_feeds (title,feed_url) values ('Planet Ubuntu', 'http://planet.ubuntulinux.org/rss20.xml');
|
||||||
|
|
||||||
insert into ttrss_feeds (title,feed_url) values ('Monologue', 'http://www.go-mono.com/monologue/index.rss');
|
insert into ttrss_feeds (title,feed_url) values ('Monologue', 'http://www.go-mono.com/monologue/index.rss');
|
||||||
|
|
||||||
insert into ttrss_feeds (title,feed_url) values ('Latest Linux Kernel Versions',
|
insert into ttrss_feeds (title,feed_url) values ('Latest Linux Kernel Versions',
|
||||||
|
@ -30,8 +32,8 @@ create table ttrss_entries (id serial not null primary key,
|
||||||
title varchar(250) not null,
|
title varchar(250) not null,
|
||||||
guid varchar(300) not null unique,
|
guid varchar(300) not null unique,
|
||||||
link varchar(300) not null unique,
|
link varchar(300) not null unique,
|
||||||
md5_hash varchar(200) not null,
|
|
||||||
content text not null,
|
content text not null,
|
||||||
|
content_hash varchar(250) not null,
|
||||||
last_read timestamp,
|
last_read timestamp,
|
||||||
no_orig_date boolean not null default false,
|
no_orig_date boolean not null default false,
|
||||||
unread boolean not null default true);
|
unread boolean not null default true);
|
||||||
|
|
Loading…
Reference in a new issue