package main import ( "bufio" "log" "os" "strings" "sync" ) //ByControlPlane contains all the channels we need. type ByControlPlane struct { BadTokens chan string GoodTokens chan string StatsTokens chan string } type bScore struct { BadScore float64 GoodScore float64 } type bMap struct { bScores map[string]bScore busy sync.Mutex } //ControPlane is the variabile var ControPlane ByControlPlane //ByClassifier is the structure containing our Pseudo-Bayes classifier. type ByClassifier struct { STATS sync.Map Matrix bMap } //AddStats adds the statistics after proper blocking. func (c *ByClassifier) AddStats(action string) { var one int64 = 1 if v, ok := c.STATS.Load(action); ok { c.STATS.Store(action, v.(int64)+1) } else { c.STATS.Store(action, one) } } //IsBAD inserts a bad key in the right place. func (c *ByClassifier) IsBAD(key string) { c.Matrix.busy.Lock() defer c.Matrix.busy.Unlock() var t bScore if val, ok := c.Matrix.bScores[key]; ok { t.BadScore = val.BadScore + 1 t.GoodScore = val.GoodScore } else { t.BadScore = 1 t.GoodScore = 0 } c.Matrix.bScores[key] = t } //IsGOOD inserts the key in the right place. func (c *ByClassifier) IsGOOD(key string) { c.Matrix.busy.Lock() defer c.Matrix.busy.Unlock() var t bScore if val, ok := c.Matrix.bScores[key]; ok { t.GoodScore = val.GoodScore + 1 t.BadScore = val.BadScore } else { t.BadScore = 0 t.GoodScore = 1 } c.Matrix.bScores[key] = t } //Posterior calculates Shannon based entropy using bad and good as different distributions func (c *ByClassifier) Posterior(hdr string) map[string]float64 { c.Matrix.busy.Lock() defer c.Matrix.busy.Unlock() tokens := strings.Fields(sanitizeHeaders(hdr)) lenTokens := float64(len(tokens)) ff := make(map[string]float64) if lenTokens == 0 { ff["BAD"] = 0.5 ff["GOOD"] = 0.5 return ff } log.Println("Start classification of: ", tokens) var hBadM, hGoodM float64 for _, tk := range tokens { if val, ok := c.Matrix.bScores[tk]; ok { if val.BadScore > 0 { hBadM += val.BadScore } if val.GoodScore > 0 { hGoodM += val.GoodScore } } } maxScore := float64(ProxyFlow.seniority) * lenTokens if maxScore == 0 { maxScore = 1 } ff["BAD"] = hBadM / maxScore ff["GOOD"] = hGoodM / maxScore log.Println("Entropies: ", ff) return ff } func (c *ByClassifier) enroll() { ControPlane.BadTokens = make(chan string, 2048) ControPlane.GoodTokens = make(chan string, 2048) ControPlane.StatsTokens = make(chan string, 2048) c.Matrix.busy.Lock() c.Matrix.bScores = make(map[string]bScore) c.Matrix.busy.Unlock() c.readInitList("blacklist.txt", "BAD") c.readInitList("whitelist.txt", "GOOD") go c.readBadTokens() go c.readGoodTokens() go c.readStatsTokens() log.Println("Classifier populated...") } func (c *ByClassifier) readBadTokens() { log.Println("Start reading BAD tokens") for token := range ControPlane.BadTokens { log.Println("Received BAD Token: ", token) c.IsBAD(token) } } func (c *ByClassifier) readGoodTokens() { log.Println("Start reading GOOD tokens") for token := range ControPlane.GoodTokens { log.Println("Received GOOD Token: ", token) c.IsGOOD(token) } } func (c *ByClassifier) readStatsTokens() { log.Println("Start reading STATS tokens") for token := range ControPlane.StatsTokens { c.AddStats(token) } } func (c *ByClassifier) readInitList(filePath, class string) { inFile, err := os.Open(filePath) if err != nil { log.Println(err.Error() + `: ` + filePath) return } defer inFile.Close() scanner := bufio.NewScanner(inFile) for scanner.Scan() { if len(scanner.Text()) > 3 { switch class { case "BAD": log.Println("Loading into Blacklist: ", scanner.Text()) // the line c.IsBAD(scanner.Text()) case "GOOD": log.Println("Loading into Whitelist: ", scanner.Text()) // the line c.IsGOOD(scanner.Text()) } } } }