|
@@ -1,13 +1,17 @@
|
|
|
package main
|
|
|
|
|
|
import (
|
|
|
+ "bufio"
|
|
|
"fmt"
|
|
|
"log"
|
|
|
+ "os"
|
|
|
"sort"
|
|
|
"strconv"
|
|
|
- "strings"
|
|
|
+
|
|
|
"sync"
|
|
|
"time"
|
|
|
+
|
|
|
+ "github.com/lytics/multibayes"
|
|
|
)
|
|
|
|
|
|
//ByControlPlane contains all the channels we need.
|
|
@@ -26,6 +30,7 @@ type ByClassifier struct {
|
|
|
BAD sync.Map
|
|
|
MEH sync.Map
|
|
|
STATS sync.Map
|
|
|
+ bayez *multibayes.Classifier
|
|
|
}
|
|
|
|
|
|
//AddStats adds the statistics after proper blocking.
|
|
@@ -46,21 +51,19 @@ func (c *ByClassifier) IsBAD(key string) {
|
|
|
|
|
|
if _, ok := c.MEH.Load(key); ok {
|
|
|
c.MEH.Store(key, time.Now().UnixNano())
|
|
|
+ log.Println("Updated BAD into MEH: ", key)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
if _, ok := c.GOOD.Load(key); ok {
|
|
|
c.MEH.Store(key, time.Now().UnixNano())
|
|
|
c.GOOD.Delete(key)
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- if _, ok := c.BAD.Load(key); ok {
|
|
|
- c.BAD.Store(key, time.Now().UnixNano())
|
|
|
+ log.Println("Moved to MEH from GOOD: ", key)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
c.BAD.Store(key, time.Now().UnixNano())
|
|
|
+ log.Println("Stored into BAD: ", key)
|
|
|
|
|
|
}
|
|
|
|
|
@@ -69,59 +72,36 @@ func (c *ByClassifier) IsGOOD(key string) {
|
|
|
|
|
|
if _, ok := c.MEH.Load(key); ok {
|
|
|
c.MEH.Store(key, time.Now().UnixNano())
|
|
|
+ log.Println("Updated GOOD into MEH: ", key)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
if _, ok := c.BAD.Load(key); ok {
|
|
|
c.MEH.Store(key, time.Now().UnixNano())
|
|
|
c.BAD.Delete(key)
|
|
|
- return
|
|
|
- }
|
|
|
-
|
|
|
- if _, ok := c.GOOD.Load(key); ok {
|
|
|
- c.GOOD.Store(key, time.Now().UnixNano())
|
|
|
+ log.Println("Moved to MEH from BAD: ", key)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
c.GOOD.Store(key, time.Now().UnixNano())
|
|
|
+ log.Println("Stored into GOOD: ", key)
|
|
|
|
|
|
}
|
|
|
|
|
|
//Posterior calculates the posterior probabilities in pseudo-bayes.
|
|
|
-func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
|
|
|
-
|
|
|
- headers := strings.Fields(sanitizeHeaders(hdr))
|
|
|
+func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
|
|
|
|
|
|
- var result = make(map[string]float64)
|
|
|
- result["BAD"] = 0
|
|
|
- result["GOOD"] = 0
|
|
|
+ defer func() {
|
|
|
|
|
|
- var tmpResGood, tmpResBad, tmpTotal float64
|
|
|
-
|
|
|
- for _, token := range headers {
|
|
|
-
|
|
|
- if _, ok := c.BAD.Load(token); ok {
|
|
|
- tmpResBad++
|
|
|
- tmpTotal++
|
|
|
+ if a := recover(); a != nil {
|
|
|
+ fmt.Println("OPS!: Recovering from:", a)
|
|
|
+ ff = make(map[string]float64)
|
|
|
+ ff["BAD"] = 0.5
|
|
|
+ ff["GOOD"] = 0.5
|
|
|
}
|
|
|
+ }()
|
|
|
|
|
|
- if _, ok := c.GOOD.Load(token); ok {
|
|
|
- tmpResGood++
|
|
|
- tmpTotal++
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- if tmpTotal == 0 {
|
|
|
- tmpTotal = 1
|
|
|
- }
|
|
|
-
|
|
|
- log.Printf("Bad Tokens: %f, Good Tokens %f , Total %f\n", tmpResBad, tmpResGood, tmpTotal)
|
|
|
-
|
|
|
- result["BAD"] = tmpResBad / tmpTotal
|
|
|
- result["GOOD"] = tmpResGood / tmpTotal
|
|
|
-
|
|
|
- return result
|
|
|
+ return c.bayez.Posterior(hdr)
|
|
|
|
|
|
}
|
|
|
|
|
@@ -140,6 +120,37 @@ func (c *ByClassifier) Janitor(size int) {
|
|
|
|
|
|
}
|
|
|
|
|
|
+//RefreshBayes refresh the bayesian using values we stored
|
|
|
+func (c *ByClassifier) RefreshBayes() {
|
|
|
+
|
|
|
+ log.Println("RefreshBayes Thread started")
|
|
|
+
|
|
|
+ ticker := time.NewTicker(5 * time.Minute)
|
|
|
+
|
|
|
+ for ; true; <-ticker.C {
|
|
|
+
|
|
|
+ c.bayez = multibayes.NewClassifier()
|
|
|
+ c.bayez.MinClassSize = 0
|
|
|
+
|
|
|
+ c.BAD.Range(func(key interface{}, value interface{}) bool {
|
|
|
+ c.bayez.Add(key.(string), []string{"BAD"})
|
|
|
+ return true
|
|
|
+ })
|
|
|
+
|
|
|
+ c.GOOD.Range(func(key interface{}, value interface{}) bool {
|
|
|
+ c.bayez.Add(key.(string), []string{"GOOD"})
|
|
|
+ return true
|
|
|
+ })
|
|
|
+
|
|
|
+ c.MEH.Range(func(key interface{}, value interface{}) bool {
|
|
|
+ c.bayez.Add(key.(string), []string{"GOOD", "BAD"})
|
|
|
+ return true
|
|
|
+ })
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
//CleanThread is the Janitor thread
|
|
|
func (c *ByClassifier) CleanThread() {
|
|
|
|
|
@@ -164,8 +175,11 @@ func (c *ByClassifier) enroll() {
|
|
|
ControPlane.GoodTokens = make(chan string, 2048)
|
|
|
ControPlane.StatsTokens = make(chan string, 2048)
|
|
|
|
|
|
- c.IsBAD("Penis")
|
|
|
- c.IsGOOD("Gun")
|
|
|
+ c.bayez = multibayes.NewClassifier()
|
|
|
+ c.bayez.MinClassSize = 0
|
|
|
+
|
|
|
+ c.readInitList("blacklist.txt", "BAD")
|
|
|
+ c.readInitList("whitelist.txt", "GOOD")
|
|
|
c.MEH.Store("Dildo", time.Now().UnixNano())
|
|
|
|
|
|
go c.readBadTokens()
|
|
@@ -174,6 +188,7 @@ func (c *ByClassifier) enroll() {
|
|
|
|
|
|
log.Println("Classifier populated...")
|
|
|
go c.CleanThread()
|
|
|
+ go c.RefreshBayes()
|
|
|
log.Println("Janitor Started")
|
|
|
|
|
|
}
|
|
@@ -197,7 +212,7 @@ func sortMap(unsorted *sync.Map, size int) {
|
|
|
|
|
|
sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
|
|
|
|
|
|
- if len(tempCont) > size {
|
|
|
+ if size > 0 && len(tempCont) > size {
|
|
|
tempCont = tempCont[:size]
|
|
|
}
|
|
|
|
|
@@ -217,6 +232,7 @@ func (c *ByClassifier) readBadTokens() {
|
|
|
log.Println("Start reading BAD tokens")
|
|
|
|
|
|
for token := range ControPlane.BadTokens {
|
|
|
+ log.Println("Received BAD Token: ", token)
|
|
|
c.IsBAD(token)
|
|
|
}
|
|
|
|
|
@@ -227,6 +243,7 @@ func (c *ByClassifier) readGoodTokens() {
|
|
|
log.Println("Start reading GOOD tokens")
|
|
|
|
|
|
for token := range ControPlane.GoodTokens {
|
|
|
+ log.Println("Received GOOD Token: ", token)
|
|
|
c.IsGOOD(token)
|
|
|
}
|
|
|
|
|
@@ -241,3 +258,29 @@ func (c *ByClassifier) readStatsTokens() {
|
|
|
}
|
|
|
|
|
|
}
|
|
|
+
|
|
|
+func (c *ByClassifier) readInitList(filePath, class string) {
|
|
|
+
|
|
|
+ inFile, err := os.Open(filePath)
|
|
|
+ if err != nil {
|
|
|
+ log.Println(err.Error() + `: ` + filePath)
|
|
|
+ return
|
|
|
+ }
|
|
|
+ defer inFile.Close()
|
|
|
+
|
|
|
+ scanner := bufio.NewScanner(inFile)
|
|
|
+ for scanner.Scan() {
|
|
|
+
|
|
|
+ if len(scanner.Text()) > 3 {
|
|
|
+ switch class {
|
|
|
+ case "BAD":
|
|
|
+ log.Println("Loading into Blacklist: ", scanner.Text()) // the line
|
|
|
+ c.IsBAD(scanner.Text())
|
|
|
+ case "GOOD":
|
|
|
+ log.Println("Loading into Whitelist: ", scanner.Text()) // the line
|
|
|
+ c.IsGOOD(scanner.Text())
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+}
|