123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276 |
- package main
- import (
- "bufio"
- "log"
- "os"
- "strings"
- "sync"
- "time"
- )
- //ByControlPlane contains all the channels we need.
- type ByControlPlane struct {
- BadTokens chan string
- GoodTokens chan string
- StatsTokens chan string
- }
- type safeClassifier struct {
- sMap map[string]string
- busy sync.Mutex
- }
- type safeStats struct {
- stats map[string]int64
- busy sync.Mutex
- }
- //ControPlane is the variabile
- var ControPlane ByControlPlane
- //ByClassifier is the structure containing our Pseudo-Bayes classifier.
- type ByClassifier struct {
- STATS safeStats
- Learning safeClassifier
- Working safeClassifier
- Generation int64
- }
- //AddStats adds the statistics after proper blocking.
- func (c *ByClassifier) AddStats(action string) {
- c.STATS.busy.Lock()
- defer c.STATS.busy.Unlock()
- if _, exists := c.STATS.stats[action]; exists {
- c.STATS.stats[action]++
- } else {
- c.STATS.stats[action] = 1
- }
- }
- //IsBAD inserts a bad key in the right place.
- func (c *ByClassifier) IsBAD(key string) {
- log.Println("BAD Received", key)
- k := strings.Fields(key)
- c.Learning.busy.Lock()
- defer c.Learning.busy.Unlock()
- for _, tk := range k {
- if kind, exists := c.Learning.sMap[tk]; exists {
- switch kind {
- case "BAD":
- log.Println("Word was known as bad:", tk)
- case "GOOD":
- c.Learning.sMap[tk] = "MEH"
- log.Println("So sad, work was known as good", tk)
- case "MEH":
- log.Println("Word was known as ambiguos:", tk)
- }
- } else {
- c.Learning.sMap[tk] = "BAD"
- }
- }
- log.Println("BAD Learned", key)
- }
- //IsGOOD inserts the key in the right place.
- func (c *ByClassifier) IsGOOD(key string) {
- k := strings.Fields(key)
- log.Println("GOOD Received", key)
- c.Learning.busy.Lock()
- defer c.Learning.busy.Unlock()
- for _, tk := range k {
- if kind, exists := c.Learning.sMap[tk]; exists {
- switch kind {
- case "GOOD":
- log.Println("Word was known as good: ", tk)
- case "BAD":
- c.Learning.sMap[tk] = "MEH"
- log.Println("So sad, work was known as bad: ", tk)
- case "MEH":
- log.Println("Word was known as ambiguos: ", tk)
- }
- } else {
- c.Learning.sMap[tk] = "GOOD"
- }
- }
- log.Println("GOOD Learned", key)
- }
- //Posterior calculates Shannon based entropy using bad and good as different distributions
- func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
- tokens := strings.Fields(sanitizeHeaders(hdr))
- ff := make(map[string]float64)
- if c.Generation == 0 || len(tokens) == 0 {
- ff["BAD"] = 0.5
- ff["GOOD"] = 0.5
- return ff
- }
- log.Println("Posterior locking the Working Bayesian")
- c.Working.busy.Lock()
- defer c.Working.busy.Unlock()
- var totalGood, totalBad float64
- for _, tk := range tokens {
- if kind, exists := c.Working.sMap[tk]; exists {
- switch kind {
- case "BAD":
- totalBad++
- case "GOOD":
- totalGood++
- }
- }
- }
- ff["GOOD"] = 1 - (totalBad / float64(len(tokens)))
- ff["BAD"] = 1 - (totalGood / float64(len(tokens)))
- return ff
- }
- func (c *ByClassifier) enroll() {
- ControPlane.BadTokens = make(chan string, 2048)
- ControPlane.GoodTokens = make(chan string, 2048)
- ControPlane.StatsTokens = make(chan string, 2048)
- c.Generation = 0
- c.Learning.sMap = make(map[string]string)
- c.Working.sMap = make(map[string]string)
- c.STATS.stats = make(map[string]int64)
- c.readInitList("blacklist.txt", "BAD")
- c.readInitList("whitelist.txt", "GOOD")
- go c.readBadTokens()
- go c.readGoodTokens()
- go c.readStatsTokens()
- go c.updateLearners()
- log.Println("Classifier populated...")
- }
- func (c *ByClassifier) readBadTokens() {
- log.Println("Start reading BAD tokens")
- for token := range ControPlane.BadTokens {
- log.Println("Received BAD Token: ", token)
- c.IsBAD(token)
- }
- }
- func (c *ByClassifier) readGoodTokens() {
- log.Println("Start reading GOOD tokens")
- for token := range ControPlane.GoodTokens {
- log.Println("Received GOOD Token: ", token)
- c.IsGOOD(token)
- }
- }
- func (c *ByClassifier) readStatsTokens() {
- log.Println("Start reading STATS tokens")
- for token := range ControPlane.StatsTokens {
- c.AddStats(token)
- }
- }
- func (c *ByClassifier) readInitList(filePath, class string) {
- inFile, err := os.Open(filePath)
- if err != nil {
- log.Println(err.Error() + `: ` + filePath)
- return
- }
- defer inFile.Close()
- scanner := bufio.NewScanner(inFile)
- for scanner.Scan() {
- if len(scanner.Text()) > 3 {
- switch class {
- case "BAD":
- log.Println("Loading into Blacklist: ", scanner.Text()) // the line
- c.IsBAD(scanner.Text())
- case "GOOD":
- log.Println("Loading into Whitelist: ", scanner.Text()) // the line
- c.IsGOOD(scanner.Text())
- }
- }
- }
- }
- func (c *ByClassifier) updateLearners() {
- log.Println("Bayes Updater Start...")
- ticker := time.NewTicker(10 * time.Second)
- for ; true; <-ticker.C {
- var currentGen int64
- log.Println("Maturity is:", Maturity)
- log.Println("Seniority is:", ProxyFlow.seniority)
- if Maturity > 0 {
- currentGen = ProxyFlow.seniority / Maturity
- } else {
- currentGen = 0
- }
- log.Println("Current Generation is: ", currentGen)
- log.Println("Working Generation is: ", c.Generation)
- if currentGen > c.Generation {
- c.Learning.busy.Lock()
- c.Working.busy.Lock()
- c.Working.sMap = c.Learning.sMap
- c.Learning.sMap = make(map[string]string)
- c.Generation = currentGen
- log.Println("Generation Updated to: ", c.Generation)
- ControPlane.StatsTokens <- "GENERATION"
- c.Learning.busy.Unlock()
- c.Working.busy.Unlock()
- }
- }
- }
|