matrix.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. package main
  2. import (
  3. "bufio"
  4. "log"
  5. "os"
  6. "strings"
  7. "sync"
  8. "time"
  9. )
  10. const (
  11. Good Class = "GOOD"
  12. Bad Class = "BAD"
  13. )
  14. //ByControlPlane contains all the channels we need.
  15. type ByControlPlane struct {
  16. BadTokens chan string
  17. GoodTokens chan string
  18. StatsTokens chan string
  19. }
  20. type safeClassifier struct {
  21. bayez *Classifier
  22. busy sync.Mutex
  23. }
  24. //ControPlane is the variabile
  25. var ControPlane ByControlPlane
  26. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  27. type ByClassifier struct {
  28. STATS sync.Map
  29. Learning safeClassifier
  30. Working safeClassifier
  31. Generation int64
  32. }
  33. //AddStats adds the statistics after proper blocking.
  34. func (c *ByClassifier) AddStats(action string) {
  35. var one int64 = 1
  36. if v, ok := c.STATS.Load(action); ok {
  37. c.STATS.Store(action, v.(int64)+1)
  38. } else {
  39. c.STATS.Store(action, one)
  40. }
  41. }
  42. //IsBAD inserts a bad key in the right place.
  43. func (c *ByClassifier) IsBAD(key string) {
  44. k := strings.Fields(key)
  45. log.Println("BAD Received", k)
  46. c.Learning.busy.Lock()
  47. defer c.Learning.busy.Unlock()
  48. c.Learning.bayez.Learn(k, Bad)
  49. log.Println("BAD Learned", k)
  50. }
  51. //IsGOOD inserts the key in the right place.
  52. func (c *ByClassifier) IsGOOD(key string) {
  53. k := strings.Fields(key)
  54. log.Println("GOOD Received", k)
  55. c.Learning.busy.Lock()
  56. defer c.Learning.busy.Unlock()
  57. c.Learning.bayez.Learn(k, Good)
  58. log.Println("GOOD Learned", k)
  59. }
  60. //Posterior calculates Shannon based entropy using bad and good as different distributions
  61. func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
  62. tokens := sanitizeHeaders(hdr)
  63. ff := make(map[string]float64)
  64. if c.Generation == 0 {
  65. ff["BAD"] = 0.5
  66. ff["GOOD"] = 0.5
  67. return ff
  68. }
  69. log.Println("Posterior locking the Working Bayesian")
  70. c.Working.busy.Lock()
  71. defer c.Working.busy.Unlock()
  72. log.Println("Going to calculate the Scores")
  73. scores, _, strict, err := c.Working.bayez.SafeProbScores(strings.Fields(tokens))
  74. log.Println("Scores calculated")
  75. if err == ErrUnderflow {
  76. ff["BAD"] = 0.5
  77. ff["GOOD"] = 0.5
  78. return ff
  79. }
  80. if !strict {
  81. ff["BAD"] = 0.5
  82. ff["GOOD"] = 0.5
  83. return ff
  84. }
  85. ff["GOOD"] = scores[0]
  86. ff["BAD"] = scores[1]
  87. return ff
  88. }
  89. func (c *ByClassifier) enroll() {
  90. ControPlane.BadTokens = make(chan string, 2048)
  91. ControPlane.GoodTokens = make(chan string, 2048)
  92. ControPlane.StatsTokens = make(chan string, 2048)
  93. c.Generation = 0
  94. c.Learning.bayez = NewClassifierTfIdf(Good, Bad)
  95. c.Working.bayez = NewClassifierTfIdf(Good, Bad)
  96. c.readInitList("blacklist.txt", "BAD")
  97. c.readInitList("whitelist.txt", "GOOD")
  98. go c.readBadTokens()
  99. go c.readGoodTokens()
  100. go c.readStatsTokens()
  101. go c.updateLearners()
  102. log.Println("Classifier populated...")
  103. }
  104. func (c *ByClassifier) readBadTokens() {
  105. log.Println("Start reading BAD tokens")
  106. for token := range ControPlane.BadTokens {
  107. log.Println("Received BAD Token: ", token)
  108. c.IsBAD(token)
  109. }
  110. }
  111. func (c *ByClassifier) readGoodTokens() {
  112. log.Println("Start reading GOOD tokens")
  113. for token := range ControPlane.GoodTokens {
  114. log.Println("Received GOOD Token: ", token)
  115. c.IsGOOD(token)
  116. }
  117. }
  118. func (c *ByClassifier) readStatsTokens() {
  119. log.Println("Start reading STATS tokens")
  120. for token := range ControPlane.StatsTokens {
  121. c.AddStats(token)
  122. }
  123. }
  124. func (c *ByClassifier) readInitList(filePath, class string) {
  125. inFile, err := os.Open(filePath)
  126. if err != nil {
  127. log.Println(err.Error() + `: ` + filePath)
  128. return
  129. }
  130. defer inFile.Close()
  131. scanner := bufio.NewScanner(inFile)
  132. for scanner.Scan() {
  133. if len(scanner.Text()) > 3 {
  134. switch class {
  135. case "BAD":
  136. log.Println("Loading into Blacklist: ", scanner.Text()) // the line
  137. c.IsBAD(scanner.Text())
  138. case "GOOD":
  139. log.Println("Loading into Whitelist: ", scanner.Text()) // the line
  140. c.IsGOOD(scanner.Text())
  141. }
  142. }
  143. }
  144. }
  145. func (c *ByClassifier) updateLearners() {
  146. log.Println("Bayes Updater Start...")
  147. ticker := time.NewTicker(10 * time.Second)
  148. for ; true; <-ticker.C {
  149. var currentGen int64
  150. log.Println("Maturity is:", Maturity)
  151. log.Println("Seniority is:", ProxyFlow.seniority)
  152. if Maturity > 0 {
  153. currentGen = ProxyFlow.seniority / Maturity
  154. } else {
  155. currentGen = 0
  156. }
  157. log.Println("Current Generation is: ", currentGen)
  158. log.Println("Working Generation is: ", c.Generation)
  159. if currentGen > c.Generation {
  160. c.Learning.busy.Lock()
  161. c.Working.busy.Lock()
  162. c.Working.bayez = c.Learning.bayez
  163. c.Working.bayez.ConvertTermsFreqToTfIdf()
  164. c.Learning.bayez = NewClassifierTfIdf(Good, Bad)
  165. c.Generation = currentGen
  166. log.Println("Generation Updated to: ", c.Generation)
  167. c.Learning.busy.Unlock()
  168. c.Working.busy.Unlock()
  169. }
  170. }
  171. }