Browse Source

Improved Bayesian

Loweel 4 years ago
parent
commit
6e42e94826
3 changed files with 40 additions and 43 deletions
  1. 9 4
      file.go
  2. 28 38
      matrix.go
  3. 3 1
      whitelist.txt

+ 9 - 4
file.go

@@ -61,7 +61,7 @@ func handlepanic() {
 
 func saveBayesToFile() {
 
-	var tmpJSON string
+	var tmpJSON, tmpBayes string
 	var DumpJson = new(dumpStruct)
 
 	DumpJson.Update = time.Now().String()
@@ -113,6 +113,14 @@ func saveBayesToFile() {
 		tmpJSON = e.Error()
 	}
 
+	if tmpB, er := json.MarshalIndent(Classifier.bayez.Matrix, "", " "); er == nil {
+		tmpBayes = fmt.Sprintf("%s", tmpB)
+	} else {
+		tmpBayes = er.Error()
+	}
+
+	tmpJSON = fmt.Sprintf("%s\n%s", tmpJSON, tmpBayes)
+
 	dumpfile := os.Getenv("DUMPFILE")
 	if dumpfile == "" {
 		dumpfile = "bayes.json"
@@ -144,6 +152,3 @@ func init() {
 	go jsonEngine()
 	log.Printf("FIle Engine Started")
 }
-
-
-

+ 28 - 38
matrix.go

@@ -7,9 +7,11 @@ import (
 	"os"
 	"sort"
 	"strconv"
-	"strings"
+
 	"sync"
 	"time"
+
+	"github.com/lytics/multibayes"
 )
 
 //ByControlPlane contains all the channels we need.
@@ -28,6 +30,7 @@ type ByClassifier struct {
 	BAD   sync.Map
 	MEH   sync.Map
 	STATS sync.Map
+	bayez *multibayes.Classifier
 }
 
 //AddStats adds the statistics after proper blocking.
@@ -88,43 +91,7 @@ func (c *ByClassifier) IsGOOD(key string) {
 //Posterior calculates the posterior probabilities in pseudo-bayes.
 func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
 
-	headers := strings.Fields(sanitizeHeaders(hdr))
-
-	var result = make(map[string]float64)
-	result["BAD"] = 0
-	result["GOOD"] = 0
-
-	var tmpResGood, tmpResBad, tmpResMeh, tmpTotal float64
-
-	for _, token := range headers {
-
-		if _, ok := c.BAD.Load(token); ok {
-			tmpResBad++
-			tmpTotal++
-		}
-
-		if _, ok := c.GOOD.Load(token); ok {
-			tmpResGood++
-			tmpTotal++
-		}
-
-		if _, ok := c.MEH.Load(token); ok {
-			tmpResMeh++
-			tmpTotal++
-		}
-
-	}
-
-	if tmpTotal == 0 {
-		tmpTotal = 1
-	}
-
-	log.Printf("Bad Tokens: %f, Good Tokens %f , Total %f\n", tmpResBad, tmpResGood, tmpTotal)
-
-	result["BAD"] = (tmpResBad + tmpResMeh) / tmpTotal
-	result["GOOD"] = (tmpResGood + tmpResMeh) / tmpTotal
-
-	return result
+	return c.bayez.Posterior(hdr)
 
 }
 
@@ -139,6 +106,26 @@ func (c *ByClassifier) Janitor(size int) {
 
 	sortMap(&c.MEH, size)
 
+	c.bayez = nil // mark it for garbage collection.
+
+	c.bayez = multibayes.NewClassifier()
+	c.bayez.MinClassSize = 0
+
+	c.BAD.Range(func(key interface{}, value interface{}) bool {
+		c.bayez.Add(key.(string), []string{"BAD"})
+		return true
+	})
+
+	c.GOOD.Range(func(key interface{}, value interface{}) bool {
+		c.bayez.Add(key.(string), []string{"GOOD"})
+		return true
+	})
+
+	c.MEH.Range(func(key interface{}, value interface{}) bool {
+		c.bayez.Add(key.(string), []string{"GOOD", "BAD"})
+		return true
+	})
+
 	log.Println("Janitor Finished.")
 
 }
@@ -167,6 +154,9 @@ func (c *ByClassifier) enroll() {
 	ControPlane.GoodTokens = make(chan string, 2048)
 	ControPlane.StatsTokens = make(chan string, 2048)
 
+	c.bayez = multibayes.NewClassifier()
+	c.bayez.MinClassSize = 0
+
 	c.readInitList("blacklist.txt", "BAD")
 	c.readInitList("whitelist.txt", "GOOD")
 	c.MEH.Store("Dildo", time.Now().UnixNano())

+ 3 - 1
whitelist.txt

@@ -1,2 +1,4 @@
 guns
-cors
+cors
+/favicon.ico
+favicon