Преглед на файлове

more fts stuff for simple index

Andrew Dolgov преди 8 години
родител
ревизия
e854442e1f
променени са 6 файла, в които са добавени 103 реда и са изтрити 42 реда
  1. 41 42
      include/functions2.php
  2. 11 0
      include/rssfuncs.php
  3. 2 0
      schema/ttrss_schema_pgsql.sql
  4. 5 0
      schema/versions/mysql/128.sql
  5. 8 0
      schema/versions/pgsql/128.sql
  6. 36 0
      update.php

+ 41 - 42
include/functions2.php

@@ -282,17 +282,10 @@
 
 	function search_to_sql($search) {
 
-		/*if (DB_TYPE == "pgsql") {
-			$search_escaped = db_escape_string($search);
-
-			return array("(to_tsvector('english', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
-				@@ to_tsquery('$search_escaped'))", explode(" ", $search));
-		}*/
-
 		$keywords = str_getcsv($search, " ");
 		$query_keywords = array();
 		$search_words = array();
-		$search_query_leftover = "";
+		$search_query_leftover = array();
 
 		foreach ($keywords as $k) {
 			if (strpos($k, "-") === 0) {
@@ -390,23 +383,27 @@
 
 					array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')");
 				} else {
-					$search_query_leftover .= $k . " ";
 
-					if (!$not) array_push($search_words, $k);
+					if (DB_TYPE == "pgsql") {
+						$k = mb_strtolower($k);
+						array_push($search_query_leftover, $not ? "!$k" : $k);
+					} else {
+						array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
+							OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
+					}
 
-					/*array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
-						OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
-					if (!$not) array_push($search_words, $k);*/
+					if (!$not) array_push($search_words, $k);
 				}
 			}
 		}
 
-		if ($search_query_leftover) {
-			$search_query_leftover = db_escape_string($search_query_leftover);
+		if (count($search_query_leftover) > 0) {
+			$search_query_leftover = db_escape_string(implode(" & ", $search_query_leftover));
 
-			array_push($query_keywords,
-				"(to_tsvector('simple', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
-				@@ to_tsquery('$search_query_leftover'))");
+			if (DB_TYPE == "pgsql") {
+				array_push($query_keywords,
+					"(tsvector_combined @@ '$search_query_leftover'::tsquery)");
+			}
 
 		}
 
@@ -713,29 +710,30 @@
 				if ($feed == -3)
 					$first_id_query_strategy_part = "true";
 
-				// if previous topmost article id changed that means our current pagination is no longer valid
-				$query = "SELECT DISTINCT
-						ttrss_feeds.title,
-						date_entered,
-						guid,
-						ttrss_entries.id,
-						ttrss_entries.title,
-						updated,
-						score,
-						marked,
-						published,
-						last_marked,
-						last_published
-					FROM
-						$from_qpart
-					WHERE
-					$feed_check_qpart
-					ttrss_user_entries.ref_id = ttrss_entries.id AND
-					ttrss_user_entries.owner_uid = '$owner_uid' AND
-					$search_query_part
-					$start_ts_query_part
-					$since_id_part
-					$first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
+				if (!$search) {
+					// if previous topmost article id changed that means our current pagination is no longer valid
+					$query = "SELECT DISTINCT
+							ttrss_feeds.title,
+							date_entered,
+							guid,
+							ttrss_entries.id,
+							ttrss_entries.title,
+							updated,
+							score,
+							marked,
+							published,
+							last_marked,
+							last_published
+						FROM
+							$from_qpart
+						WHERE
+						$feed_check_qpart
+						ttrss_user_entries.ref_id = ttrss_entries.id AND
+						ttrss_user_entries.owner_uid = '$owner_uid' AND
+						$search_query_part
+						$start_ts_query_part
+						$since_id_part
+						$first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
 
 					if ($_REQUEST["debug"]) {
 						print $query;
@@ -743,12 +741,13 @@
 
 					$result = db_query($query);
 					if ($result && db_num_rows($result) > 0) {
-						$first_id = (int) db_fetch_result($result, 0, "id");
+						$first_id = (int)db_fetch_result($result, 0, "id");
 
 						if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) {
 							return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id);
 						}
 					}
+				}
 
 				$query = "SELECT DISTINCT
 						date_entered,

+ 11 - 0
include/rssfuncs.php

@@ -984,11 +984,22 @@
 
 					_debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
 
+					if (DB_TYPE == "pgsql") {
+						$tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
+							0, 1000000));
+
+						$tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
+
+					} else {
+						$tsvector_qpart = "";
+					}
+
 					db_query("UPDATE ttrss_entries
 						SET title = '$entry_title',
 							content = '$entry_content',
 							content_hash = '$entry_current_hash',
 							updated = '$entry_timestamp_fmt',
+							$tsvector_qpart
 							num_comments = '$num_comments',
 							plugin_data = '$entry_plugin_data',
 							author = '$entry_author',

+ 2 - 0
schema/ttrss_schema_pgsql.sql

@@ -142,12 +142,14 @@ create table ttrss_entries (id serial not null primary key,
 	num_comments integer not null default 0,
 	comments varchar(250) not null default '',
 	plugin_data text,
+	tsvector_combined tsvector,
 	lang varchar(2),
 	author varchar(250) not null default '');
 
 -- create index ttrss_entries_title_index on ttrss_entries(title);
 create index ttrss_entries_date_entered_index on ttrss_entries(date_entered);
 create index ttrss_entries_updated_idx on ttrss_entries(updated);
+create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
 
 create table ttrss_user_entries (
 	int_id serial not null primary key,

+ 5 - 0
schema/versions/mysql/128.sql

@@ -0,0 +1,5 @@
+BEGIN;
+
+UPDATE ttrss_version SET schema_version = 128;
+
+COMMIT;

+ 8 - 0
schema/versions/pgsql/128.sql

@@ -0,0 +1,8 @@
+BEGIN;
+
+alter table ttrss_entries add column tsvector_combined tsvector;
+create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
+
+UPDATE ttrss_version SET schema_version = 128;
+
+COMMIT;

+ 36 - 0
update.php

@@ -33,6 +33,7 @@
 			"update-schema",
 			"convert-filters",
 			"force-update",
+			"update-search-idx",
 			"list-plugins",
 			"help");
 
@@ -80,6 +81,7 @@
 		print "  --log FILE           - log messages to FILE\n";
 		print "  --indexes            - recreate missing schema indexes\n";
 		print "  --update-schema      - update database schema\n";
+		print "  --update-search-idx  - update PostgreSQL fulltext search index\n";
 		print "  --convert-filters    - convert type1 filters to type2\n";
 		print "  --force-update       - force update of all feeds\n";
 		print "  --list-plugins       - list all available plugins\n";
@@ -330,6 +332,40 @@
 
 	}
 
+	if (isset($options["update-search-idx"])) {
+		echo "Generating search index...\n";
+
+		$result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
+		$count = db_fetch_result($result, 0, "count");
+
+		print "Total entries: $count.\n";
+
+		$offset = 0;
+		$limit = 1000;
+
+		while (true) {
+			$result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit OFFSET $offset");
+
+			if (db_num_rows($result) != 0) {
+				echo "Offset $offset...\n";
+
+				while ($line = db_fetch_assoc($result)) {
+					$tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
+						0, 1000000));
+
+					db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
+				}
+
+				$offset += $limit;
+			} else {
+				echo "All done.\n";
+				break;
+			}
+
+		}
+
+	}
+
 	if (isset($options["list-plugins"])) {
 		$tmppluginhost = new PluginHost();
 		$tmppluginhost->load_all($tmppluginhost::KIND_ALL);