use ngram tokens instead of whole words for matching
This commit is contained in:
parent
4947c02e1a
commit
3dcd00e4c4
1 changed files with 5 additions and 4 deletions
|
@ -16,6 +16,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
|
|
||||||
function init($host) {
|
function init($host) {
|
||||||
require_once __DIR__ . "/lib/class.naivebayesian.php";
|
require_once __DIR__ . "/lib/class.naivebayesian.php";
|
||||||
|
require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
|
||||||
require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
|
require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
|
||||||
|
|
||||||
$this->host = $host;
|
$this->host = $host;
|
||||||
|
@ -36,7 +37,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
$category = $train_up ? "GOOD" : "NEUTRAL";
|
$category = $train_up ? "GOOD" : "NEUTRAL";
|
||||||
|
|
||||||
$nbs = new NaiveBayesianStorage($_SESSION["uid"]);
|
$nbs = new NaiveBayesianStorage($_SESSION["uid"]);
|
||||||
$nb = new NaiveBayesian($nbs);
|
$nb = new NaiveBayesianNgram($nbs);
|
||||||
|
|
||||||
$result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
|
$result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
|
||||||
$article_id . " AND owner_uid = " . $_SESSION["uid"]);
|
$article_id . " AND owner_uid = " . $_SESSION["uid"]);
|
||||||
|
@ -202,7 +203,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
$owner_uid = $article["owner_uid"];
|
$owner_uid = $article["owner_uid"];
|
||||||
|
|
||||||
$nbs = new NaiveBayesianStorage($owner_uid);
|
$nbs = new NaiveBayesianStorage($owner_uid);
|
||||||
$nb = new NaiveBayesian($nbs);
|
$nb = new NaiveBayesianNgram($nbs);
|
||||||
|
|
||||||
$categories = $nbs->getCategories();
|
$categories = $nbs->getCategories();
|
||||||
|
|
||||||
|
@ -227,7 +228,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
|
|
||||||
$bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
|
$bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
|
||||||
|
|
||||||
if ($count_neutral >= 3000 && $count_good >= 1000) {
|
if ($count_neutral >= 20000 && $count_good >= 10000) {
|
||||||
// enable automatic categorization
|
// enable automatic categorization
|
||||||
|
|
||||||
$result = $nb->categorize($bayes_content);
|
$result = $nb->categorize($bayes_content);
|
||||||
|
@ -261,7 +262,7 @@ class Af_Sort_Bayes extends Plugin {
|
||||||
$this->dbh->query("COMMIT");
|
$this->dbh->query("COMMIT");
|
||||||
|
|
||||||
$nbs = new NaiveBayesianStorage($_SESSION["uid"]);
|
$nbs = new NaiveBayesianStorage($_SESSION["uid"]);
|
||||||
$nb = new NaiveBayesian($nbs);
|
$nb = new NaiveBayesianNgram($nbs);
|
||||||
$nb->updateProbabilities();
|
$nb->updateProbabilities();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue