use ngram tokens instead of whole words for matching

This commit is contained in:
Andrew Dolgov 2015-06-17 18:15:41 +03:00
parent 4947c02e1a
commit 3dcd00e4c4

View file

@ -16,6 +16,7 @@ class Af_Sort_Bayes extends Plugin {
function init($host) { function init($host) {
require_once __DIR__ . "/lib/class.naivebayesian.php"; require_once __DIR__ . "/lib/class.naivebayesian.php";
require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
require_once __DIR__ . "/lib/class.naivebayesianstorage.php"; require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
$this->host = $host; $this->host = $host;
@ -36,7 +37,7 @@ class Af_Sort_Bayes extends Plugin {
$category = $train_up ? "GOOD" : "NEUTRAL"; $category = $train_up ? "GOOD" : "NEUTRAL";
$nbs = new NaiveBayesianStorage($_SESSION["uid"]); $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
$nb = new NaiveBayesian($nbs); $nb = new NaiveBayesianNgram($nbs);
$result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
$article_id . " AND owner_uid = " . $_SESSION["uid"]); $article_id . " AND owner_uid = " . $_SESSION["uid"]);
@ -202,7 +203,7 @@ class Af_Sort_Bayes extends Plugin {
$owner_uid = $article["owner_uid"]; $owner_uid = $article["owner_uid"];
$nbs = new NaiveBayesianStorage($owner_uid); $nbs = new NaiveBayesianStorage($owner_uid);
$nb = new NaiveBayesian($nbs); $nb = new NaiveBayesianNgram($nbs);
$categories = $nbs->getCategories(); $categories = $nbs->getCategories();
@ -227,7 +228,7 @@ class Af_Sort_Bayes extends Plugin {
$bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
if ($count_neutral >= 3000 && $count_good >= 1000) { if ($count_neutral >= 20000 && $count_good >= 10000) {
// enable automatic categorization // enable automatic categorization
$result = $nb->categorize($bayes_content); $result = $nb->categorize($bayes_content);
@ -261,7 +262,7 @@ class Af_Sort_Bayes extends Plugin {
$this->dbh->query("COMMIT"); $this->dbh->query("COMMIT");
$nbs = new NaiveBayesianStorage($_SESSION["uid"]); $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
$nb = new NaiveBayesian($nbs); $nb = new NaiveBayesianNgram($nbs);
$nb->updateProbabilities(); $nb->updateProbabilities();
} }