more fts stuff for simple index
This commit is contained in:
parent
b4f544d389
commit
e854442e1f
6 changed files with 103 additions and 42 deletions
|
@ -282,17 +282,10 @@
|
|||
|
||||
function search_to_sql($search) {
|
||||
|
||||
/*if (DB_TYPE == "pgsql") {
|
||||
$search_escaped = db_escape_string($search);
|
||||
|
||||
return array("(to_tsvector('english', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
|
||||
@@ to_tsquery('$search_escaped'))", explode(" ", $search));
|
||||
}*/
|
||||
|
||||
$keywords = str_getcsv($search, " ");
|
||||
$query_keywords = array();
|
||||
$search_words = array();
|
||||
$search_query_leftover = "";
|
||||
$search_query_leftover = array();
|
||||
|
||||
foreach ($keywords as $k) {
|
||||
if (strpos($k, "-") === 0) {
|
||||
|
@ -390,23 +383,27 @@
|
|||
|
||||
array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')");
|
||||
} else {
|
||||
$search_query_leftover .= $k . " ";
|
||||
|
||||
if (DB_TYPE == "pgsql") {
|
||||
$k = mb_strtolower($k);
|
||||
array_push($search_query_leftover, $not ? "!$k" : $k);
|
||||
} else {
|
||||
array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
|
||||
OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
|
||||
}
|
||||
|
||||
if (!$not) array_push($search_words, $k);
|
||||
|
||||
/*array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
|
||||
OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
|
||||
if (!$not) array_push($search_words, $k);*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($search_query_leftover) {
|
||||
$search_query_leftover = db_escape_string($search_query_leftover);
|
||||
if (count($search_query_leftover) > 0) {
|
||||
$search_query_leftover = db_escape_string(implode(" & ", $search_query_leftover));
|
||||
|
||||
array_push($query_keywords,
|
||||
"(to_tsvector('simple', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
|
||||
@@ to_tsquery('$search_query_leftover'))");
|
||||
if (DB_TYPE == "pgsql") {
|
||||
array_push($query_keywords,
|
||||
"(tsvector_combined @@ '$search_query_leftover'::tsquery)");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -713,29 +710,30 @@
|
|||
if ($feed == -3)
|
||||
$first_id_query_strategy_part = "true";
|
||||
|
||||
// if previous topmost article id changed that means our current pagination is no longer valid
|
||||
$query = "SELECT DISTINCT
|
||||
ttrss_feeds.title,
|
||||
date_entered,
|
||||
guid,
|
||||
ttrss_entries.id,
|
||||
ttrss_entries.title,
|
||||
updated,
|
||||
score,
|
||||
marked,
|
||||
published,
|
||||
last_marked,
|
||||
last_published
|
||||
FROM
|
||||
$from_qpart
|
||||
WHERE
|
||||
$feed_check_qpart
|
||||
ttrss_user_entries.ref_id = ttrss_entries.id AND
|
||||
ttrss_user_entries.owner_uid = '$owner_uid' AND
|
||||
$search_query_part
|
||||
$start_ts_query_part
|
||||
$since_id_part
|
||||
$first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
|
||||
if (!$search) {
|
||||
// if previous topmost article id changed that means our current pagination is no longer valid
|
||||
$query = "SELECT DISTINCT
|
||||
ttrss_feeds.title,
|
||||
date_entered,
|
||||
guid,
|
||||
ttrss_entries.id,
|
||||
ttrss_entries.title,
|
||||
updated,
|
||||
score,
|
||||
marked,
|
||||
published,
|
||||
last_marked,
|
||||
last_published
|
||||
FROM
|
||||
$from_qpart
|
||||
WHERE
|
||||
$feed_check_qpart
|
||||
ttrss_user_entries.ref_id = ttrss_entries.id AND
|
||||
ttrss_user_entries.owner_uid = '$owner_uid' AND
|
||||
$search_query_part
|
||||
$start_ts_query_part
|
||||
$since_id_part
|
||||
$first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
|
||||
|
||||
if ($_REQUEST["debug"]) {
|
||||
print $query;
|
||||
|
@ -743,12 +741,13 @@
|
|||
|
||||
$result = db_query($query);
|
||||
if ($result && db_num_rows($result) > 0) {
|
||||
$first_id = (int) db_fetch_result($result, 0, "id");
|
||||
$first_id = (int)db_fetch_result($result, 0, "id");
|
||||
|
||||
if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) {
|
||||
return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$query = "SELECT DISTINCT
|
||||
date_entered,
|
||||
|
|
|
@ -984,11 +984,22 @@
|
|||
|
||||
_debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
|
||||
|
||||
if (DB_TYPE == "pgsql") {
|
||||
$tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
|
||||
0, 1000000));
|
||||
|
||||
$tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
|
||||
|
||||
} else {
|
||||
$tsvector_qpart = "";
|
||||
}
|
||||
|
||||
db_query("UPDATE ttrss_entries
|
||||
SET title = '$entry_title',
|
||||
content = '$entry_content',
|
||||
content_hash = '$entry_current_hash',
|
||||
updated = '$entry_timestamp_fmt',
|
||||
$tsvector_qpart
|
||||
num_comments = '$num_comments',
|
||||
plugin_data = '$entry_plugin_data',
|
||||
author = '$entry_author',
|
||||
|
|
|
@ -142,12 +142,14 @@ create table ttrss_entries (id serial not null primary key,
|
|||
num_comments integer not null default 0,
|
||||
comments varchar(250) not null default '',
|
||||
plugin_data text,
|
||||
tsvector_combined tsvector,
|
||||
lang varchar(2),
|
||||
author varchar(250) not null default '');
|
||||
|
||||
-- create index ttrss_entries_title_index on ttrss_entries(title);
|
||||
create index ttrss_entries_date_entered_index on ttrss_entries(date_entered);
|
||||
create index ttrss_entries_updated_idx on ttrss_entries(updated);
|
||||
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
|
||||
|
||||
create table ttrss_user_entries (
|
||||
int_id serial not null primary key,
|
||||
|
|
5
schema/versions/mysql/128.sql
Normal file
5
schema/versions/mysql/128.sql
Normal file
|
@ -0,0 +1,5 @@
|
|||
BEGIN;
|
||||
|
||||
UPDATE ttrss_version SET schema_version = 128;
|
||||
|
||||
COMMIT;
|
8
schema/versions/pgsql/128.sql
Normal file
8
schema/versions/pgsql/128.sql
Normal file
|
@ -0,0 +1,8 @@
|
|||
BEGIN;
|
||||
|
||||
alter table ttrss_entries add column tsvector_combined tsvector;
|
||||
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
|
||||
|
||||
UPDATE ttrss_version SET schema_version = 128;
|
||||
|
||||
COMMIT;
|
36
update.php
36
update.php
|
@ -33,6 +33,7 @@
|
|||
"update-schema",
|
||||
"convert-filters",
|
||||
"force-update",
|
||||
"update-search-idx",
|
||||
"list-plugins",
|
||||
"help");
|
||||
|
||||
|
@ -80,6 +81,7 @@
|
|||
print " --log FILE - log messages to FILE\n";
|
||||
print " --indexes - recreate missing schema indexes\n";
|
||||
print " --update-schema - update database schema\n";
|
||||
print " --update-search-idx - update PostgreSQL fulltext search index\n";
|
||||
print " --convert-filters - convert type1 filters to type2\n";
|
||||
print " --force-update - force update of all feeds\n";
|
||||
print " --list-plugins - list all available plugins\n";
|
||||
|
@ -330,6 +332,40 @@
|
|||
|
||||
}
|
||||
|
||||
if (isset($options["update-search-idx"])) {
|
||||
echo "Generating search index...\n";
|
||||
|
||||
$result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
|
||||
$count = db_fetch_result($result, 0, "count");
|
||||
|
||||
print "Total entries: $count.\n";
|
||||
|
||||
$offset = 0;
|
||||
$limit = 1000;
|
||||
|
||||
while (true) {
|
||||
$result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit OFFSET $offset");
|
||||
|
||||
if (db_num_rows($result) != 0) {
|
||||
echo "Offset $offset...\n";
|
||||
|
||||
while ($line = db_fetch_assoc($result)) {
|
||||
$tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
|
||||
0, 1000000));
|
||||
|
||||
db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
|
||||
}
|
||||
|
||||
$offset += $limit;
|
||||
} else {
|
||||
echo "All done.\n";
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (isset($options["list-plugins"])) {
|
||||
$tmppluginhost = new PluginHost();
|
||||
$tmppluginhost->load_all($tmppluginhost::KIND_ALL);
|
||||
|
|
Loading…
Reference in a new issue