From c3fc5e470e24cd44b8eae50b4af70c9c7fab0e93 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 9 Nov 2010 14:49:41 +0300 Subject: [PATCH] allow filtering by article tags (bump schema) --- functions.php | 208 +++++++++++++++++++--------------- sanity_check.php | 2 +- schema/ttrss_schema_mysql.sql | 3 +- schema/ttrss_schema_pgsql.sql | 3 +- schema/versions/mysql/71.sql | 3 + schema/versions/pgsql/71.sql | 3 + 6 files changed, 130 insertions(+), 92 deletions(-) create mode 100644 schema/versions/mysql/71.sql create mode 100644 schema/versions/pgsql/71.sql diff --git a/functions.php b/functions.php index d9754a2f..54847cc7 100644 --- a/functions.php +++ b/functions.php @@ -954,6 +954,10 @@ if (!$num_comments) $num_comments = 0; + if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { + _debug("update_rss_feed: looking for tags [1]..."); + } + // parse entries into tags if ($use_simplepie) { @@ -1013,68 +1017,25 @@ } } - // enclosures + if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { + _debug("update_rss_feed: looking for tags [2]..."); + } - $enclosures = array(); + /* taaaags */ + // , // - if ($use_simplepie) { - $encs = $item->get_enclosures(); + $entry_tags = null; - if (is_array($encs)) { - foreach ($encs as $e) { - $e_item = array( - $e->link, $e->type, $e->length); - - array_push($enclosures, $e_item); - } - } + preg_match_all("/([^<]+)<\/a>/i", + $entry_content_unescaped, $entry_tags); - } else { - // + $entry_tags = $entry_tags[1]; - $e_ctr = $item['enclosure#']; + $entry_tags = array_merge($entry_tags, $additional_tags); - if ($e_ctr > 0) { - $e_item = array($item['enclosure@url'], - $item['enclosure@type'], - $item['enclosure@length']); - - array_push($enclosures, $e_item); - - for ($i = 0; $i <= $e_ctr; $i++ ) { - - if ($item["enclosure#$i@url"]) { - $e_item = array($item["enclosure#$i@url"], - $item["enclosure#$i@type"], - $item["enclosure#$i@length"]); - array_push($enclosures, $e_item); - } - } - } - - // - // can there be many of those? yes -fox - - $m_ctr = $item['media']['content#']; - - if ($m_ctr > 0) { - $e_item = array($item['media']['content@url'], - $item['media']['content@medium'], - $item['media']['content@length']); - - array_push($enclosures, $e_item); - - for ($i = 0; $i <= $m_ctr; $i++ ) { - - if ($item["media"]["content#$i@url"]) { - $e_item = array($item["media"]["content#$i@url"], - $item["media"]["content#$i@medium"], - $item["media"]["content#$i@length"]); - array_push($enclosures, $e_item); - } - } - - } + if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { + _debug("update_rss_feed: unfiltered tags found:"); + print_r($entry_tags); } # sanitize content @@ -1176,8 +1137,10 @@ // error_reporting(0); + /* Collect article tags here so we could filter by them: */ + $article_filters = get_article_filters($filters, $entry_title, - $entry_content, $entry_link, $entry_timestamp, $entry_author); + $entry_content, $entry_link, $entry_timestamp, $entry_author, $entry_tags); if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { _debug("update_rss_feed: article filters: "); @@ -1324,7 +1287,73 @@ _debug("update_rss_feed: looking for enclosures..."); } + // enclosures + + $enclosures = array(); + + if ($use_simplepie) { + $encs = $item->get_enclosures(); + + if (is_array($encs)) { + foreach ($encs as $e) { + $e_item = array( + $e->link, $e->type, $e->length); + + array_push($enclosures, $e_item); + } + } + + } else { + // + + $e_ctr = $item['enclosure#']; + + if ($e_ctr > 0) { + $e_item = array($item['enclosure@url'], + $item['enclosure@type'], + $item['enclosure@length']); + + array_push($enclosures, $e_item); + + for ($i = 0; $i <= $e_ctr; $i++ ) { + + if ($item["enclosure#$i@url"]) { + $e_item = array($item["enclosure#$i@url"], + $item["enclosure#$i@type"], + $item["enclosure#$i@length"]); + array_push($enclosures, $e_item); + } + } + } + + // + // can there be many of those? yes -fox + + $m_ctr = $item['media']['content#']; + + if ($m_ctr > 0) { + $e_item = array($item['media']['content@url'], + $item['media']['content@medium'], + $item['media']['content@length']); + + array_push($enclosures, $e_item); + + for ($i = 0; $i <= $m_ctr; $i++ ) { + + if ($item["media"]["content#$i@url"]) { + $e_item = array($item["media"]["content#$i@url"], + $item["media"]["content#$i@medium"], + $item["media"]["content#$i@length"]); + array_push($enclosures, $e_item); + } + } + + } + } + + if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { + _debug("update_rss_feed: article enclosures:"); print_r($enclosures); } @@ -1347,25 +1376,7 @@ db_query($link, "COMMIT"); - if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { - _debug("update_rss_feed: looking for tags..."); - } - - /* taaaags */ - // , // - - $entry_tags = null; - - preg_match_all("/([^<]+)<\/a>/i", - $entry_content_unescaped, $entry_tags); - -/* print "


$entry_title : $entry_content_unescaped
"; - print_r($entry_tags); - print "

"; */ - - $entry_tags = $entry_tags[1]; - - # check for manual tags + // check for manual tags (we have to do it here since they're loaded from filters) foreach ($article_filters as $f) { if ($f[0] == "tag") { @@ -1380,29 +1391,33 @@ } } + // Skip boring tags + $boring_tags = trim_array(split(",", mb_strtolower(get_pref($link, 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8'))); - if ($additional_tags && is_array($additional_tags)) { - foreach ($additional_tags as $tag) { - if (tag_is_valid($tag) && - array_search($tag, $boring_tags) === FALSE) { - array_push($entry_tags, $tag); + $filtered_tags = array(); + + if ($entry_tags && is_array($entry_tags)) { + foreach ($entry_tags as $tag) { + if (array_search($tag, $boring_tags) === false) { + array_push($filtered_tags, $tag); } } } -// print "

TAGS: "; print_r($entry_tags); print "

"; - if (defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']) { - print_r($entry_tags); + _debug("update_rss_feed: filtered article tags:"); + print_r($filtered_tags); } - if (count($entry_tags) > 0) { + // Save article tags in the database + + if (count($filtered_tags) > 0) { db_query($link, "BEGIN"); - foreach ($entry_tags as $tag) { + foreach ($filtered_tags as $tag) { $tag = sanitize_tag($tag); $tag = db_escape_string($tag); @@ -1413,8 +1428,6 @@ WHERE tag_name = '$tag' AND post_int_id = '$entry_int_id' AND owner_uid = '$owner_uid' LIMIT 1"); - // print db_fetch_result($result, 0, "id"); - if ($result && db_num_rows($result) == 0) { db_query($link, "INSERT INTO ttrss_tags @@ -1501,7 +1514,7 @@ print ""; } - function get_article_filters($filters, $title, $content, $link, $timestamp, $author) { + function get_article_filters($filters, $title, $content, $link, $timestamp, $author, $tags) { $matches = array(); if ($filters["title"]) { @@ -1598,6 +1611,23 @@ } } + if ($filters["tag"]) { + + $tag_string = join(",", $tags); + + foreach ($filters["tag"] as $filter) { + $reg_exp = $filter["reg_exp"]; + $inverse = $filter["inverse"]; + + if ((!$inverse && preg_match("/$reg_exp/i", $tag_string)) || + ($inverse && !preg_match("/$reg_exp/i", $tag_string))) { + + array_push($matches, array($filter["action"], $filter["action_param"])); + } + } + } + + return $matches; } diff --git a/sanity_check.php b/sanity_check.php index 0f539bde..b5432199 100644 --- a/sanity_check.php +++ b/sanity_check.php @@ -2,7 +2,7 @@ require_once "functions.php"; define('EXPECTED_CONFIG_VERSION', 19); - define('SCHEMA_VERSION', 70); + define('SCHEMA_VERSION', 71); if (!file_exists("config.php")) { print "Fatal Error: You forgot to copy diff --git a/schema/ttrss_schema_mysql.sql b/schema/ttrss_schema_mysql.sql index 03ea5e61..da73cedd 100644 --- a/schema/ttrss_schema_mysql.sql +++ b/schema/ttrss_schema_mysql.sql @@ -187,6 +187,7 @@ insert into ttrss_filter_types (id,name,description) values (4, 'link', insert into ttrss_filter_types (id,name,description) values (5, 'date', 'Article Date'); insert into ttrss_filter_types (id,name,description) values (6, 'author', 'Author'); +insert into ttrss_filter_types (id,name,description) values (7, 'tag', 'Article Tags'); create table ttrss_filter_actions (id integer not null primary key, name varchar(120) unique not null, @@ -243,7 +244,7 @@ create table ttrss_tags (id integer primary key auto_increment, create table ttrss_version (schema_version int not null) TYPE=InnoDB DEFAULT CHARSET=UTF8; -insert into ttrss_version values (70); +insert into ttrss_version values (71); create table ttrss_enclosures (id integer primary key auto_increment, content_url text not null, diff --git a/schema/ttrss_schema_pgsql.sql b/schema/ttrss_schema_pgsql.sql index 2d1f33c6..3583639c 100644 --- a/schema/ttrss_schema_pgsql.sql +++ b/schema/ttrss_schema_pgsql.sql @@ -169,6 +169,7 @@ insert into ttrss_filter_types (id,name,description) values (4, 'link', insert into ttrss_filter_types (id,name,description) values (5, 'date', 'Article Date'); insert into ttrss_filter_types (id,name,description) values (6, 'author', 'Author'); +insert into ttrss_filter_types (id,name,description) values (7, 'tag', 'Article Tags'); create table ttrss_filter_actions (id integer not null primary key, name varchar(120) unique not null, @@ -215,7 +216,7 @@ create index ttrss_tags_owner_uid_index on ttrss_tags(owner_uid); create table ttrss_version (schema_version int not null); -insert into ttrss_version values (70); +insert into ttrss_version values (71); create table ttrss_enclosures (id serial not null primary key, content_url text not null, diff --git a/schema/versions/mysql/71.sql b/schema/versions/mysql/71.sql new file mode 100644 index 00000000..50ac7820 --- /dev/null +++ b/schema/versions/mysql/71.sql @@ -0,0 +1,3 @@ +insert into ttrss_filter_types (id,name,description) values (7, 'tag', 'Article Tags'); + +update ttrss_version set schema_version = 71; diff --git a/schema/versions/pgsql/71.sql b/schema/versions/pgsql/71.sql new file mode 100644 index 00000000..50ac7820 --- /dev/null +++ b/schema/versions/pgsql/71.sql @@ -0,0 +1,3 @@ +insert into ttrss_filter_types (id,name,description) values (7, 'tag', 'Article Tags'); + +update ttrss_version set schema_version = 71;