|
@@ -2,13 +2,12 @@ package main
|
|
|
|
|
|
import (
|
|
|
"bufio"
|
|
|
- "fmt"
|
|
|
+
|
|
|
"log"
|
|
|
+ "math"
|
|
|
"os"
|
|
|
-
|
|
|
+ "strings"
|
|
|
"sync"
|
|
|
-
|
|
|
- "github.com/lytics/multibayes"
|
|
|
)
|
|
|
|
|
|
//ByControlPlane contains all the channels we need.
|
|
@@ -18,14 +17,23 @@ type ByControlPlane struct {
|
|
|
StatsTokens chan string
|
|
|
}
|
|
|
|
|
|
+type bScore struct {
|
|
|
+ BadScore float64
|
|
|
+ GoodScore float64
|
|
|
+}
|
|
|
+
|
|
|
+type bMap struct {
|
|
|
+ bScores map[string]bScore
|
|
|
+ busy sync.Mutex
|
|
|
+}
|
|
|
+
|
|
|
//ControPlane is the variabile
|
|
|
var ControPlane ByControlPlane
|
|
|
|
|
|
//ByClassifier is the structure containing our Pseudo-Bayes classifier.
|
|
|
type ByClassifier struct {
|
|
|
- STATS sync.Map
|
|
|
- bayez *multibayes.Classifier
|
|
|
- busy sync.Mutex
|
|
|
+ STATS sync.Map
|
|
|
+ Matrix bMap
|
|
|
}
|
|
|
|
|
|
//AddStats adds the statistics after proper blocking.
|
|
@@ -44,39 +52,94 @@ func (c *ByClassifier) AddStats(action string) {
|
|
|
//IsBAD inserts a bad key in the right place.
|
|
|
func (c *ByClassifier) IsBAD(key string) {
|
|
|
|
|
|
- c.busy.Lock()
|
|
|
- defer c.busy.Unlock()
|
|
|
+ c.Matrix.busy.Lock()
|
|
|
+ defer c.Matrix.busy.Unlock()
|
|
|
|
|
|
- c.bayez.Add(key, []string{"BAD"})
|
|
|
+ var t bScore
|
|
|
+
|
|
|
+ if val, ok := c.Matrix.bScores[key]; ok {
|
|
|
+ t.BadScore = val.BadScore + 1
|
|
|
+ t.GoodScore = val.GoodScore
|
|
|
+ } else {
|
|
|
+ t.BadScore = 1
|
|
|
+ t.GoodScore = 0
|
|
|
+ }
|
|
|
+
|
|
|
+ c.Matrix.bScores[key] = t
|
|
|
|
|
|
}
|
|
|
|
|
|
//IsGOOD inserts the key in the right place.
|
|
|
func (c *ByClassifier) IsGOOD(key string) {
|
|
|
|
|
|
- c.busy.Lock()
|
|
|
- defer c.busy.Unlock()
|
|
|
+ c.Matrix.busy.Lock()
|
|
|
+ defer c.Matrix.busy.Unlock()
|
|
|
+
|
|
|
+ var t bScore
|
|
|
+
|
|
|
+ if val, ok := c.Matrix.bScores[key]; ok {
|
|
|
+ t.GoodScore = val.GoodScore + 1
|
|
|
+ t.BadScore = val.BadScore
|
|
|
+ } else {
|
|
|
+ t.BadScore = 0
|
|
|
+ t.GoodScore = 1
|
|
|
+ }
|
|
|
|
|
|
- c.bayez.Add(key, []string{"GOOD"})
|
|
|
+ c.Matrix.bScores[key] = t
|
|
|
|
|
|
}
|
|
|
|
|
|
//Posterior calculates the posterior probabilities in pseudo-bayes.
|
|
|
-func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
|
|
|
+func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
|
|
|
+
|
|
|
+ c.Matrix.busy.Lock()
|
|
|
+ defer c.Matrix.busy.Unlock()
|
|
|
+
|
|
|
+ tokens := strings.Fields(sanitizeHeaders(hdr))
|
|
|
+ lenTokens := float64(len(tokens))
|
|
|
|
|
|
- defer func() {
|
|
|
+ ff := make(map[string]float64)
|
|
|
|
|
|
- if a := recover(); a != nil {
|
|
|
- fmt.Println("OPS!: Recovering from:", a)
|
|
|
- ff = make(map[string]float64)
|
|
|
- ff["BAD"] = 0.5
|
|
|
- ff["GOOD"] = 0.5
|
|
|
+ if lenTokens == 0 {
|
|
|
+ ff["BAD"] = 0.5
|
|
|
+ ff["GOOD"] = 0.5
|
|
|
+ return ff
|
|
|
+ }
|
|
|
+
|
|
|
+ log.Println("Start classification of: ", tokens)
|
|
|
+
|
|
|
+ var hBadM, hGoodM float64
|
|
|
+
|
|
|
+ for _, tk := range tokens {
|
|
|
+
|
|
|
+ if val, ok := c.Matrix.bScores[tk]; ok {
|
|
|
+ log.Println("Classifier found: ", tk)
|
|
|
+ if val.BadScore > 0 {
|
|
|
+ hBadM += val.BadScore * math.Log2(val.BadScore)
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ if val.GoodScore > 0 {
|
|
|
+ hGoodM += val.GoodScore * math.Log2(val.GoodScore)
|
|
|
+
|
|
|
+ }
|
|
|
}
|
|
|
- }()
|
|
|
|
|
|
- c.busy.Lock()
|
|
|
- ff = c.bayez.Posterior(hdr)
|
|
|
- defer c.busy.Unlock()
|
|
|
+ }
|
|
|
+
|
|
|
+ hBadM = math.Log2(lenTokens) - (hBadM / lenTokens)
|
|
|
+ hGoodM = math.Log2(lenTokens) - (hGoodM / lenTokens)
|
|
|
+
|
|
|
+ if math.Abs(hGoodM) >= math.Abs(hBadM) {
|
|
|
+ ff["GOOD"] = 1
|
|
|
+ ff["BAD"] = 0
|
|
|
+ } else {
|
|
|
+ ff["GOOD"] = 0
|
|
|
+ ff["BAD"] = 1
|
|
|
+ }
|
|
|
+
|
|
|
+ log.Println("Entropies: ", ff)
|
|
|
+
|
|
|
return ff
|
|
|
|
|
|
}
|
|
@@ -87,10 +150,9 @@ func (c *ByClassifier) enroll() {
|
|
|
ControPlane.GoodTokens = make(chan string, 2048)
|
|
|
ControPlane.StatsTokens = make(chan string, 2048)
|
|
|
|
|
|
- c.busy.Lock()
|
|
|
- c.bayez = multibayes.NewClassifier()
|
|
|
- c.bayez.MinClassSize = 0
|
|
|
- c.busy.Unlock()
|
|
|
+ c.Matrix.busy.Lock()
|
|
|
+ c.Matrix.bScores = make(map[string]bScore)
|
|
|
+ c.Matrix.busy.Unlock()
|
|
|
|
|
|
c.readInitList("blacklist.txt", "BAD")
|
|
|
c.readInitList("whitelist.txt", "GOOD")
|