package main import ( "bufio" "log" "os" "strings" "sync" "time" ) const ( Good Class = "GOOD" Bad Class = "BAD" ) //ByControlPlane contains all the channels we need. type ByControlPlane struct { BadTokens chan string GoodTokens chan string StatsTokens chan string } type safeClassifier struct { bayez *Classifier busy sync.Mutex } //ControPlane is the variabile var ControPlane ByControlPlane //ByClassifier is the structure containing our Pseudo-Bayes classifier. type ByClassifier struct { STATS sync.Map Learning safeClassifier Working safeClassifier Generation int64 } //AddStats adds the statistics after proper blocking. func (c *ByClassifier) AddStats(action string) { var one int64 = 1 if v, ok := c.STATS.Load(action); ok { c.STATS.Store(action, v.(int64)+1) } else { c.STATS.Store(action, one) } } //IsBAD inserts a bad key in the right place. func (c *ByClassifier) IsBAD(key string) { k := strings.Fields(key) log.Println("BAD Received", k) c.Learning.busy.Lock() defer c.Learning.busy.Unlock() c.Learning.bayez.Learn(k, Bad) log.Println("BAD Learned", k) } //IsGOOD inserts the key in the right place. func (c *ByClassifier) IsGOOD(key string) { k := strings.Fields(key) log.Println("GOOD Received", k) c.Learning.busy.Lock() defer c.Learning.busy.Unlock() c.Learning.bayez.Learn(k, Good) log.Println("GOOD Learned", k) } //Posterior calculates Shannon based entropy using bad and good as different distributions func (c *ByClassifier) Posterior(hdr string) map[string]float64 { tokens := sanitizeHeaders(hdr) ff := make(map[string]float64) if c.Generation == 0 { ff["BAD"] = 0.5 ff["GOOD"] = 0.5 return ff } log.Println("Posterior locking the Working Bayesian") c.Working.busy.Lock() defer c.Working.busy.Unlock() log.Println("Going to calculate the Scores") scores, _, strict, err := c.Working.bayez.SafeProbScores(strings.Fields(tokens)) log.Println("Scores calculated") if err == ErrUnderflow { ff["BAD"] = 0.5 ff["GOOD"] = 0.5 return ff } if !strict { ff["BAD"] = 0.5 ff["GOOD"] = 0.5 return ff } ff["GOOD"] = scores[0] ff["BAD"] = scores[1] return ff } func (c *ByClassifier) enroll() { ControPlane.BadTokens = make(chan string, 2048) ControPlane.GoodTokens = make(chan string, 2048) ControPlane.StatsTokens = make(chan string, 2048) c.Generation = 0 c.Learning.bayez = NewClassifierTfIdf(Good, Bad) c.Working.bayez = NewClassifierTfIdf(Good, Bad) c.readInitList("blacklist.txt", "BAD") c.readInitList("whitelist.txt", "GOOD") go c.readBadTokens() go c.readGoodTokens() go c.readStatsTokens() go c.updateLearners() log.Println("Classifier populated...") } func (c *ByClassifier) readBadTokens() { log.Println("Start reading BAD tokens") for token := range ControPlane.BadTokens { log.Println("Received BAD Token: ", token) c.IsBAD(token) } } func (c *ByClassifier) readGoodTokens() { log.Println("Start reading GOOD tokens") for token := range ControPlane.GoodTokens { log.Println("Received GOOD Token: ", token) c.IsGOOD(token) } } func (c *ByClassifier) readStatsTokens() { log.Println("Start reading STATS tokens") for token := range ControPlane.StatsTokens { c.AddStats(token) } } func (c *ByClassifier) readInitList(filePath, class string) { inFile, err := os.Open(filePath) if err != nil { log.Println(err.Error() + `: ` + filePath) return } defer inFile.Close() scanner := bufio.NewScanner(inFile) for scanner.Scan() { if len(scanner.Text()) > 3 { switch class { case "BAD": log.Println("Loading into Blacklist: ", scanner.Text()) // the line c.IsBAD(scanner.Text()) case "GOOD": log.Println("Loading into Whitelist: ", scanner.Text()) // the line c.IsGOOD(scanner.Text()) } } } } func (c *ByClassifier) updateLearners() { log.Println("Bayes Updater Start...") ticker := time.NewTicker(10 * time.Second) for ; true; <-ticker.C { var currentGen int64 log.Println("Maturity is:", Maturity) log.Println("Seniority is:", ProxyFlow.seniority) if Maturity > 0 { currentGen = ProxyFlow.seniority / Maturity } else { currentGen = 0 } log.Println("Current Generation is: ", currentGen) log.Println("Working Generation is: ", c.Generation) if currentGen > c.Generation { c.Learning.busy.Lock() c.Working.busy.Lock() c.Working.bayez = c.Learning.bayez c.Working.bayez.ConvertTermsFreqToTfIdf() c.Learning.bayez = NewClassifierTfIdf(Good, Bad) c.Generation = currentGen log.Println("Generation Updated to: ", c.Generation) c.Learning.busy.Unlock() c.Working.busy.Unlock() } } }