implement per-feed stemming language setting

This commit is contained in:
Andrew Dolgov 2015-08-04 13:32:52 +03:00
parent e854442e1f
commit df659891ac
8 changed files with 57 additions and 8 deletions

View file

@ -1,5 +1,7 @@
<?php
class Pref_Feeds extends Handler_Protected {
public static $feed_languages = array("English", "Danish", "Dutch", "Finnish", "French", "German", "Hungarian", "Italian", "Norwegian",
"Portuguese", "Russian", "Spanish", "Swedish", "Turkish", "Simple");
function csrf_ignore($method) {
$csrf_ignored = array("index", "getfeedtree", "add", "editcats", "editfeed",
@ -593,6 +595,18 @@ class Pref_Feeds extends Handler_Protected {
'dojoType="dijit.form.Select"');
}
/* FTS Stemming Language */
if (DB_TYPE == "pgsql") {
$feed_language = $this->dbh->fetch_result($result, 0, "feed_language");
print "<hr/>";
print __('Language:') . " ";
print_select("feed_language", $feed_language, $this::$feed_languages,
'dojoType="dijit.form.Select"');
}
print "</div>";
print "<div class=\"dlgSec\">".__("Update")."</div>";
@ -807,6 +821,18 @@ class Pref_Feeds extends Handler_Protected {
}
/* FTS Stemming Language */
if (DB_TYPE == "pgsql") {
print "<hr/>";
print __('Language:') . " ";
print_select("feed_language", "", $this::$feed_languages,
'disabled="1" dojoType="dijit.form.Select"');
$this->batch_edit_cbox("feed_language");
}
print "</div>";
print "<div class=\"dlgSec\">".__("Update")."</div>";
@ -938,6 +964,8 @@ class Pref_Feeds extends Handler_Protected {
$mark_unread_on_update = checkbox_to_sql_bool(
$this->dbh->escape_string($_POST["mark_unread_on_update"]));
$feed_language = $this->dbh->escape_string(trim($_POST["feed_language"]));
if (strlen(FEED_CRYPT_KEY) > 0) {
require_once "crypt.php";
$auth_pass = substr(encrypt_string($auth_pass), 0, 250);
@ -976,7 +1004,8 @@ class Pref_Feeds extends Handler_Protected {
hide_images = $hide_images,
include_in_digest = $include_in_digest,
always_display_enclosures = $always_display_enclosures,
mark_unread_on_update = $mark_unread_on_update
mark_unread_on_update = $mark_unread_on_update,
feed_language = '$feed_language'
WHERE id = '$feed_id' AND owner_uid = " . $_SESSION["uid"]);
PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_SAVE_FEED,
@ -1051,6 +1080,10 @@ class Pref_Feeds extends Handler_Protected {
$qpart = $category_qpart_nocomma;
break;
case "feed_language":
$qpart = "feed_language = '$feed_language'";
break;
}
if ($qpart) {

View file

@ -402,7 +402,7 @@
if (DB_TYPE == "pgsql") {
array_push($query_keywords,
"(tsvector_combined @@ '$search_query_leftover'::tsquery)");
"(tsvector_combined @@ to_tsquery('english', '$search_query_leftover'))");
}
}

View file

@ -306,6 +306,7 @@
feed_url,auth_pass,cache_images,
mark_unread_on_update, owner_uid,
pubsub_state, auth_pass_encrypted,
feed_language,
(SELECT max(date_entered) FROM
ttrss_entries, ttrss_user_entries where ref_id = id AND feed_id = '$feed') AS last_article_timestamp
FROM ttrss_feeds WHERE id = '$feed'");
@ -340,6 +341,8 @@
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
$fetch_url = db_fetch_result($result, 0, "feed_url");
$feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language")));
if (!$feed_language) $feed_language = 'english';
$feed = db_escape_string($feed);
@ -463,6 +466,7 @@
// We use local pluginhost here because we need to load different per-user feed plugins
$pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
_debug("language: $feed_language", $debug_enabled);
_debug("processing feed data...", $debug_enabled);
// db_query("BEGIN");
@ -988,7 +992,7 @@
$tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
0, 1000000));
$tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
$tsvector_qpart = "tsvector_combined = to_tsvector('$feed_language', '$tsvector_combined'),";
} else {
$tsvector_qpart = "";

View file

@ -127,6 +127,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key,
view_settings varchar(250) not null default '',
pubsub_state integer not null default 0,
favicon_last_checked datetime default null,
feed_language varchar(100) not null default '',
foreign key (owner_uid) references ttrss_users(id) ON DELETE CASCADE,
foreign key (cat_id) references ttrss_feed_categories(id) ON DELETE SET NULL,
foreign key (parent_feed) references ttrss_feeds(id) ON DELETE SET NULL) ENGINE=InnoDB DEFAULT CHARSET=UTF8;

View file

@ -96,6 +96,7 @@ create table ttrss_feeds (id serial not null primary key,
view_settings varchar(250) not null default '',
pubsub_state integer not null default 0,
favicon_last_checked timestamp default null,
feed_language varchar(100) not null default '',
auth_pass_encrypted boolean not null default false);
create index ttrss_feeds_owner_uid_index on ttrss_feeds(owner_uid);

View file

@ -1,5 +1,10 @@
BEGIN;
alter table ttrss_feeds add column feed_language varchar(100);
update ttrss_feeds set feed_language = '';
alter table ttrss_feeds change feed_language feed_language varchar(100) not null;
alter table ttrss_feeds alter column feed_language set default '';
UPDATE ttrss_version SET schema_version = 128;
COMMIT;

View file

@ -3,6 +3,11 @@ BEGIN;
alter table ttrss_entries add column tsvector_combined tsvector;
create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
alter table ttrss_feeds add column feed_language varchar(100);
update ttrss_feeds set feed_language = '';
alter table ttrss_feeds alter column feed_language set not null;
alter table ttrss_feeds alter column feed_language set default '';
UPDATE ttrss_version SET schema_version = 128;
COMMIT;

View file

@ -33,7 +33,7 @@
"update-schema",
"convert-filters",
"force-update",
"update-search-idx",
"gen-search-idx",
"list-plugins",
"help");
@ -81,7 +81,7 @@
print " --log FILE - log messages to FILE\n";
print " --indexes - recreate missing schema indexes\n";
print " --update-schema - update database schema\n";
print " --update-search-idx - update PostgreSQL fulltext search index\n";
print " --gen-search-idx - generate basic PostgreSQL fulltext search index\n";
print " --convert-filters - convert type1 filters to type2\n";
print " --force-update - force update of all feeds\n";
print " --list-plugins - list all available plugins\n";
@ -332,8 +332,8 @@
}
if (isset($options["update-search-idx"])) {
echo "Generating search index...\n";
if (isset($options["gen-search-idx"])) {
echo "Generating search index (stemming set to English)...\n";
$result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
$count = db_fetch_result($result, 0, "count");
@ -353,7 +353,7 @@
$tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
0, 1000000));
db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('english', '$tsvector_combined') WHERE id = " . $line["id"]);
}
$offset += $limit;