123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- package main
- import (
- "log"
- "sort"
- "strings"
- "time"
- "github.com/n3integration/classifier/naive"
- )
- //ByControlplane contains all the channels we need.
- type ByControlPlane struct {
- BadTokens chan string
- GoodTokens chan string
- StatsTokens chan string
- }
- //Controlplane is the variabile
- var ControPlane ByControlPlane
- //ByClassifier is the structure containing our Pseudo-Bayes classifier.
- type ByClassifier struct {
- GOOD map[string]float64
- BAD map[string]float64
- MEH map[string]float64
- STATS map[string]int64
- }
- //AddStats adds the statistics after proper blocking.
- func (c *ByClassifier) AddStats(action string) {
- if _, ok := c.STATS[action]; ok {
- c.STATS[action]++
- } else {
- c.STATS[action] = 1
- }
- }
- //IsBAD inserts a bad key in the right place.
- func (c *ByClassifier) IsBAD(key string) {
- if _, ok := c.MEH[key]; ok {
- c.MEH[key]++
- return
- }
- if score, ok := c.GOOD[key]; ok {
- c.MEH[key] = score + 1
- delete(c.GOOD, key)
- return
- }
- if _, ok := c.BAD[key]; ok {
- c.BAD[key]++
- return
- }
- c.BAD[key] = 1
- }
- //IsGOOD inserts the key in the right place.
- func (c *ByClassifier) IsGOOD(key string) {
- if _, ok := c.MEH[key]; ok {
- c.MEH[key]++
- return
- }
- if score, ok := c.BAD[key]; ok {
- c.MEH[key] = score + 1
- delete(c.BAD, key)
- return
- }
- if _, ok := c.GOOD[key]; ok {
- c.GOOD[key]++
- return
- }
- c.GOOD[key] = 1
- }
- //Posterior calculates the posterior probabilities in pseudo-bayes.
- func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
- headers := strings.Fields(sanitizeHeaders(hdr))
- var result = make(map[string]float64)
- result["BAD"] = 0
- result["GOOD"] = 0
- var tmpResGood, tmpResBad float64
- for _, token := range headers {
- if _, ok := c.BAD[token]; ok {
- tmpResBad += c.BAD[token]
- }
- if _, ok := c.GOOD[token]; ok {
- tmpResGood += c.GOOD[token]
- }
- }
- tmpTotal := tmpResBad + tmpResGood
- if tmpTotal == 0 {
- tmpTotal = 1
- }
- result["BAD"] = tmpResBad / tmpTotal
- result["GOOD"] = tmpResGood / tmpTotal
- log.Println(c.Bayes(hdr))
- return result
- }
- //Janitor keeps the maps under a certain size, keeping the biggest values.
- func (c *ByClassifier) Janitor(size int) {
- log.Println("Janitor Running")
- c.BAD = sortMap(c.BAD, size)
- c.GOOD = sortMap(c.GOOD, size)
- c.MEH = sortMap(c.MEH, size)
- log.Println("Janitor Finished.")
- }
- //CleanThread is the Janitor thread
- func (c *ByClassifier) CleanThread() {
- for {
- time.Sleep(10 * time.Minute)
- c.Janitor(1024)
- }
- }
- func (c *ByClassifier) enroll() {
- c.BAD = make(map[string]float64)
- c.GOOD = make(map[string]float64)
- c.MEH = make(map[string]float64)
- c.STATS = make(map[string]int64)
- ControPlane.BadTokens = make(chan string, 2048)
- ControPlane.GoodTokens = make(chan string, 2048)
- ControPlane.StatsTokens = make(chan string, 2048)
- c.IsBAD("Penis")
- c.IsGOOD("Gun")
- c.MEH["meh"] = 0
- go c.readBadTokens()
- go c.readGoodTokens()
- go c.readStatsTokens()
- log.Println("Classifier populated...")
- go c.CleanThread()
- log.Println("Janitor Started")
- }
- func sortMap(unsorted map[string]float64, size int) map[string]float64 {
- retMap := make(map[string]float64)
- type Myt struct {
- Name string
- Num float64
- }
- var tempCont []Myt
- var tc Myt
- for k, v := range unsorted {
- tc.Name = k
- tc.Num = v
- tempCont = append(tempCont, tc)
- }
- sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
- if len(tempCont) > size {
- tempCont = tempCont[:size]
- }
- for _, a := range tempCont {
- retMap[a.Name] = a.Num
- }
- return retMap
- }
- func (c *ByClassifier) readBadTokens() {
- log.Println("Start reading BAD tokens")
- for token := range ControPlane.BadTokens {
- c.IsBAD(token)
- }
- }
- func (c *ByClassifier) readGoodTokens() {
- log.Println("Start reading GOOD tokens")
- for token := range ControPlane.GoodTokens {
- c.IsGOOD(token)
- }
- }
- func (c *ByClassifier) readStatsTokens() {
- log.Println("Start reading STATS tokens")
- for token := range ControPlane.StatsTokens {
- c.AddStats(token)
- }
- }
- func (c *ByClassifier) Bayes(hdr string) string {
- classifier := naive.New()
- headers := sanitizeHeaders(hdr)
- for k, _ := range c.BAD {
- classifier.TrainString(k, "BAD")
- }
- for k, _ := range c.GOOD {
- classifier.TrainString(k, "GOOD")
- }
- if classification, err := classifier.ClassifyString(headers); err == nil {
- return classification // ham
- } else {
- log.Println("error: ", err)
- }
- return ""
- }
|