|
@@ -7,9 +7,11 @@ import (
|
|
|
"os"
|
|
|
"sort"
|
|
|
"strconv"
|
|
|
- "strings"
|
|
|
+
|
|
|
"sync"
|
|
|
"time"
|
|
|
+
|
|
|
+ "github.com/lytics/multibayes"
|
|
|
)
|
|
|
|
|
|
//ByControlPlane contains all the channels we need.
|
|
@@ -28,6 +30,7 @@ type ByClassifier struct {
|
|
|
BAD sync.Map
|
|
|
MEH sync.Map
|
|
|
STATS sync.Map
|
|
|
+ bayez *multibayes.Classifier
|
|
|
}
|
|
|
|
|
|
//AddStats adds the statistics after proper blocking.
|
|
@@ -88,43 +91,7 @@ func (c *ByClassifier) IsGOOD(key string) {
|
|
|
//Posterior calculates the posterior probabilities in pseudo-bayes.
|
|
|
func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
|
|
|
|
|
|
- headers := strings.Fields(sanitizeHeaders(hdr))
|
|
|
-
|
|
|
- var result = make(map[string]float64)
|
|
|
- result["BAD"] = 0
|
|
|
- result["GOOD"] = 0
|
|
|
-
|
|
|
- var tmpResGood, tmpResBad, tmpResMeh, tmpTotal float64
|
|
|
-
|
|
|
- for _, token := range headers {
|
|
|
-
|
|
|
- if _, ok := c.BAD.Load(token); ok {
|
|
|
- tmpResBad++
|
|
|
- tmpTotal++
|
|
|
- }
|
|
|
-
|
|
|
- if _, ok := c.GOOD.Load(token); ok {
|
|
|
- tmpResGood++
|
|
|
- tmpTotal++
|
|
|
- }
|
|
|
-
|
|
|
- if _, ok := c.MEH.Load(token); ok {
|
|
|
- tmpResMeh++
|
|
|
- tmpTotal++
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- if tmpTotal == 0 {
|
|
|
- tmpTotal = 1
|
|
|
- }
|
|
|
-
|
|
|
- log.Printf("Bad Tokens: %f, Good Tokens %f , Total %f\n", tmpResBad, tmpResGood, tmpTotal)
|
|
|
-
|
|
|
- result["BAD"] = (tmpResBad + tmpResMeh) / tmpTotal
|
|
|
- result["GOOD"] = (tmpResGood + tmpResMeh) / tmpTotal
|
|
|
-
|
|
|
- return result
|
|
|
+ return c.bayez.Posterior(hdr)
|
|
|
|
|
|
}
|
|
|
|
|
@@ -139,6 +106,26 @@ func (c *ByClassifier) Janitor(size int) {
|
|
|
|
|
|
sortMap(&c.MEH, size)
|
|
|
|
|
|
+ c.bayez = nil // mark it for garbage collection.
|
|
|
+
|
|
|
+ c.bayez = multibayes.NewClassifier()
|
|
|
+ c.bayez.MinClassSize = 0
|
|
|
+
|
|
|
+ c.BAD.Range(func(key interface{}, value interface{}) bool {
|
|
|
+ c.bayez.Add(key.(string), []string{"BAD"})
|
|
|
+ return true
|
|
|
+ })
|
|
|
+
|
|
|
+ c.GOOD.Range(func(key interface{}, value interface{}) bool {
|
|
|
+ c.bayez.Add(key.(string), []string{"GOOD"})
|
|
|
+ return true
|
|
|
+ })
|
|
|
+
|
|
|
+ c.MEH.Range(func(key interface{}, value interface{}) bool {
|
|
|
+ c.bayez.Add(key.(string), []string{"GOOD", "BAD"})
|
|
|
+ return true
|
|
|
+ })
|
|
|
+
|
|
|
log.Println("Janitor Finished.")
|
|
|
|
|
|
}
|
|
@@ -167,6 +154,9 @@ func (c *ByClassifier) enroll() {
|
|
|
ControPlane.GoodTokens = make(chan string, 2048)
|
|
|
ControPlane.StatsTokens = make(chan string, 2048)
|
|
|
|
|
|
+ c.bayez = multibayes.NewClassifier()
|
|
|
+ c.bayez.MinClassSize = 0
|
|
|
+
|
|
|
c.readInitList("blacklist.txt", "BAD")
|
|
|
c.readInitList("whitelist.txt", "GOOD")
|
|
|
c.MEH.Store("Dildo", time.Now().UnixNano())
|