123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275 |
- package main
- import (
- "bufio"
- "fmt"
- "log"
- "os"
- "sort"
- "strconv"
- "strings"
- "sync"
- "time"
- )
- //ByControlPlane contains all the channels we need.
- type ByControlPlane struct {
- BadTokens chan string
- GoodTokens chan string
- StatsTokens chan string
- }
- //ControPlane is the variabile
- var ControPlane ByControlPlane
- //ByClassifier is the structure containing our Pseudo-Bayes classifier.
- type ByClassifier struct {
- GOOD sync.Map
- BAD sync.Map
- MEH sync.Map
- STATS sync.Map
- }
- //AddStats adds the statistics after proper blocking.
- func (c *ByClassifier) AddStats(action string) {
- var one int64 = 1
- if v, ok := c.STATS.Load(action); ok {
- c.STATS.Store(action, v.(int64)+1)
- } else {
- c.STATS.Store(action, one)
- }
- }
- //IsBAD inserts a bad key in the right place.
- func (c *ByClassifier) IsBAD(key string) {
- if _, ok := c.MEH.Load(key); ok {
- c.MEH.Store(key, time.Now().UnixNano())
- log.Println("Updated BAD into MEH: ", key)
- return
- }
- if _, ok := c.GOOD.Load(key); ok {
- c.MEH.Store(key, time.Now().UnixNano())
- c.GOOD.Delete(key)
- log.Println("Moved to MEH from GOOD: ", key)
- return
- }
- c.BAD.Store(key, time.Now().UnixNano())
- log.Println("Stored into BAD: ", key)
- }
- //IsGOOD inserts the key in the right place.
- func (c *ByClassifier) IsGOOD(key string) {
- if _, ok := c.MEH.Load(key); ok {
- c.MEH.Store(key, time.Now().UnixNano())
- log.Println("Updated GOOD into MEH: ", key)
- return
- }
- if _, ok := c.BAD.Load(key); ok {
- c.MEH.Store(key, time.Now().UnixNano())
- c.BAD.Delete(key)
- log.Println("Moved to MEH from BAD: ", key)
- return
- }
- c.GOOD.Store(key, time.Now().UnixNano())
- log.Println("Stored into GOOD: ", key)
- }
- //Posterior calculates the posterior probabilities in pseudo-bayes.
- func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
- headers := strings.Fields(sanitizeHeaders(hdr))
- var result = make(map[string]float64)
- result["BAD"] = 0
- result["GOOD"] = 0
- var tmpResGood, tmpResBad, tmpResMeh, tmpTotal float64
- for _, token := range headers {
- if _, ok := c.BAD.Load(token); ok {
- tmpResBad++
- tmpTotal++
- }
- if _, ok := c.GOOD.Load(token); ok {
- tmpResGood++
- tmpTotal++
- }
- if _, ok := c.MEH.Load(token); ok {
- tmpResMeh++
- tmpTotal++
- }
- }
- if tmpTotal == 0 {
- tmpTotal = 1
- }
- log.Printf("Bad Tokens: %f, Good Tokens %f , Total %f\n", tmpResBad, tmpResGood, tmpTotal)
- result["BAD"] = (tmpResBad + tmpResMeh) / tmpTotal
- result["GOOD"] = (tmpResGood + tmpResMeh) / tmpTotal
- return result
- }
- //Janitor keeps the maps under a certain size, keeping the biggest values.
- func (c *ByClassifier) Janitor(size int) {
- log.Println("Janitor Running")
- sortMap(&c.BAD, size)
- sortMap(&c.GOOD, size)
- sortMap(&c.MEH, size)
- log.Println("Janitor Finished.")
- }
- //CleanThread is the Janitor thread
- func (c *ByClassifier) CleanThread() {
- for {
- MaxSize, err := strconv.Atoi(fmt.Sprintf("%d", Maturity))
- if err != nil {
- MaxSize = 1000
- log.Println("Maxsize converted to: ", MaxSize)
- }
- log.Println("Janitor Maxsize is now:", MaxSize)
- time.Sleep(10 * time.Second)
- c.Janitor(MaxSize)
- }
- }
- func (c *ByClassifier) enroll() {
- ControPlane.BadTokens = make(chan string, 2048)
- ControPlane.GoodTokens = make(chan string, 2048)
- ControPlane.StatsTokens = make(chan string, 2048)
- c.readInitList("blacklist.txt", "BAD")
- c.readInitList("whitelist.txt", "GOOD")
- c.MEH.Store("Dildo", time.Now().UnixNano())
- go c.readBadTokens()
- go c.readGoodTokens()
- go c.readStatsTokens()
- log.Println("Classifier populated...")
- go c.CleanThread()
- log.Println("Janitor Started")
- }
- func sortMap(unsorted *sync.Map, size int) {
- type Myt struct {
- Name string
- Num int64
- }
- var tempCont []Myt
- var tc Myt
- unsorted.Range(func(key interface{}, value interface{}) bool {
- tc.Name = key.(string)
- tc.Num = value.(int64)
- tempCont = append(tempCont, tc)
- return true
- })
- sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
- if size > 0 && len(tempCont) > size {
- tempCont = tempCont[:size]
- }
- unsorted.Range(func(key interface{}, value interface{}) bool {
- unsorted.Delete(key)
- return true
- })
- for _, val := range tempCont {
- unsorted.Store(val.Name, val.Num)
- }
- }
- func (c *ByClassifier) readBadTokens() {
- log.Println("Start reading BAD tokens")
- for token := range ControPlane.BadTokens {
- log.Println("Received BAD Token: ", token)
- c.IsBAD(token)
- }
- }
- func (c *ByClassifier) readGoodTokens() {
- log.Println("Start reading GOOD tokens")
- for token := range ControPlane.GoodTokens {
- log.Println("Received GOOD Token: ", token)
- c.IsGOOD(token)
- }
- }
- func (c *ByClassifier) readStatsTokens() {
- log.Println("Start reading STATS tokens")
- for token := range ControPlane.StatsTokens {
- c.AddStats(token)
- }
- }
- func (c *ByClassifier) readInitList(filePath, class string) {
- inFile, err := os.Open(filePath)
- if err != nil {
- log.Println(err.Error() + `: ` + filePath)
- return
- }
- defer inFile.Close()
- scanner := bufio.NewScanner(inFile)
- for scanner.Scan() {
- if len(scanner.Text()) > 3 {
- switch class {
- case "BAD":
- log.Println("Loading into Blacklist: ", scanner.Text()) // the line
- c.IsBAD(scanner.Text())
- case "GOOD":
- log.Println("Loading into Whitelist: ", scanner.Text()) // the line
- c.IsGOOD(scanner.Text())
- }
- }
- }
- }
|