matrix.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. package main
  2. import (
  3. "bufio"
  4. "log"
  5. "os"
  6. "strings"
  7. "sync"
  8. "time"
  9. )
  10. //ByControlPlane contains all the channels we need.
  11. type ByControlPlane struct {
  12. BadTokens chan string
  13. GoodTokens chan string
  14. StatsTokens chan string
  15. }
  16. type safeClassifier struct {
  17. sMap map[string]string
  18. busy sync.Mutex
  19. }
  20. type safeStats struct {
  21. stats map[string]int64
  22. busy sync.Mutex
  23. }
  24. //ControPlane is the variabile
  25. var ControPlane ByControlPlane
  26. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  27. type ByClassifier struct {
  28. STATS safeStats
  29. Learning safeClassifier
  30. Working safeClassifier
  31. Generation int64
  32. }
  33. //AddStats adds the statistics after proper blocking.
  34. func (c *ByClassifier) AddStats(action string) {
  35. c.STATS.busy.Lock()
  36. defer c.STATS.busy.Unlock()
  37. if _, exists := c.STATS.stats[action]; exists {
  38. c.STATS.stats[action]++
  39. } else {
  40. c.STATS.stats[action] = 1
  41. }
  42. }
  43. //IsBAD inserts a bad key in the right place.
  44. func (c *ByClassifier) IsBAD(key string) {
  45. log.Println("BAD Received", key)
  46. k := strings.Fields(key)
  47. c.Learning.busy.Lock()
  48. defer c.Learning.busy.Unlock()
  49. for _, tk := range k {
  50. if kind, exists := c.Learning.sMap[tk]; exists {
  51. switch kind {
  52. case "BAD":
  53. log.Println("Word was known as bad:", tk)
  54. case "GOOD":
  55. c.Learning.sMap[tk] = "MEH"
  56. log.Println("So sad, work was known as good", tk)
  57. case "MEH":
  58. log.Println("Word was known as ambiguos:", tk)
  59. }
  60. } else {
  61. c.Learning.sMap[tk] = "BAD"
  62. }
  63. }
  64. log.Println("BAD Learned", key)
  65. }
  66. //IsGOOD inserts the key in the right place.
  67. func (c *ByClassifier) IsGOOD(key string) {
  68. k := strings.Fields(key)
  69. log.Println("GOOD Received", key)
  70. c.Learning.busy.Lock()
  71. defer c.Learning.busy.Unlock()
  72. for _, tk := range k {
  73. if kind, exists := c.Learning.sMap[tk]; exists {
  74. switch kind {
  75. case "GOOD":
  76. log.Println("Word was known as good: ", tk)
  77. case "BAD":
  78. c.Learning.sMap[tk] = "MEH"
  79. log.Println("So sad, work was known as bad: ", tk)
  80. case "MEH":
  81. log.Println("Word was known as ambiguos: ", tk)
  82. }
  83. } else {
  84. c.Learning.sMap[tk] = "GOOD"
  85. }
  86. }
  87. log.Println("GOOD Learned", key)
  88. }
  89. //Posterior calculates Shannon based entropy using bad and good as different distributions
  90. func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
  91. tokens := strings.Fields(sanitizeHeaders(hdr))
  92. ff := make(map[string]float64)
  93. if c.Generation == 0 || len(tokens) == 0 {
  94. ff["BAD"] = 0.5
  95. ff["GOOD"] = 0.5
  96. return ff
  97. }
  98. log.Println("Posterior locking the Working Bayesian")
  99. c.Working.busy.Lock()
  100. defer c.Working.busy.Unlock()
  101. var totalGood, totalBad float64
  102. for _, tk := range tokens {
  103. if kind, exists := c.Working.sMap[tk]; exists {
  104. switch kind {
  105. case "BAD":
  106. totalBad++
  107. case "GOOD":
  108. totalGood++
  109. }
  110. }
  111. }
  112. ff["GOOD"] = 1 - (totalBad / float64(len(tokens)))
  113. ff["BAD"] = 1 - (totalGood / float64(len(tokens)))
  114. return ff
  115. }
  116. func (c *ByClassifier) enroll() {
  117. ControPlane.BadTokens = make(chan string, 2048)
  118. ControPlane.GoodTokens = make(chan string, 2048)
  119. ControPlane.StatsTokens = make(chan string, 2048)
  120. c.Generation = 0
  121. c.Learning.sMap = make(map[string]string)
  122. c.Working.sMap = make(map[string]string)
  123. c.STATS.stats = make(map[string]int64)
  124. c.readInitList("blacklist.txt", "BAD")
  125. c.readInitList("whitelist.txt", "GOOD")
  126. go c.readBadTokens()
  127. go c.readGoodTokens()
  128. go c.readStatsTokens()
  129. go c.updateLearners()
  130. log.Println("Classifier populated...")
  131. }
  132. func (c *ByClassifier) readBadTokens() {
  133. log.Println("Start reading BAD tokens")
  134. for token := range ControPlane.BadTokens {
  135. log.Println("Received BAD Token: ", token)
  136. c.IsBAD(token)
  137. }
  138. }
  139. func (c *ByClassifier) readGoodTokens() {
  140. log.Println("Start reading GOOD tokens")
  141. for token := range ControPlane.GoodTokens {
  142. log.Println("Received GOOD Token: ", token)
  143. c.IsGOOD(token)
  144. }
  145. }
  146. func (c *ByClassifier) readStatsTokens() {
  147. log.Println("Start reading STATS tokens")
  148. for token := range ControPlane.StatsTokens {
  149. c.AddStats(token)
  150. }
  151. }
  152. func (c *ByClassifier) readInitList(filePath, class string) {
  153. inFile, err := os.Open(filePath)
  154. if err != nil {
  155. log.Println(err.Error() + `: ` + filePath)
  156. return
  157. }
  158. defer inFile.Close()
  159. scanner := bufio.NewScanner(inFile)
  160. for scanner.Scan() {
  161. if len(scanner.Text()) > 3 {
  162. switch class {
  163. case "BAD":
  164. log.Println("Loading into Blacklist: ", scanner.Text()) // the line
  165. c.IsBAD(scanner.Text())
  166. case "GOOD":
  167. log.Println("Loading into Whitelist: ", scanner.Text()) // the line
  168. c.IsGOOD(scanner.Text())
  169. }
  170. }
  171. }
  172. }
  173. func (c *ByClassifier) updateLearners() {
  174. log.Println("Bayes Updater Start...")
  175. ticker := time.NewTicker(10 * time.Second)
  176. for ; true; <-ticker.C {
  177. var currentGen int64
  178. log.Println("Maturity is:", Maturity)
  179. log.Println("Seniority is:", ProxyFlow.seniority)
  180. if Maturity > 0 {
  181. currentGen = ProxyFlow.seniority / Maturity
  182. } else {
  183. currentGen = 0
  184. }
  185. log.Println("Current Generation is: ", currentGen)
  186. log.Println("Working Generation is: ", c.Generation)
  187. if currentGen > c.Generation || float64(len(c.Learning.sMap)) > ProxyFlow.collection {
  188. c.Learning.busy.Lock()
  189. c.Working.busy.Lock()
  190. c.Working.sMap = c.Learning.sMap
  191. c.Learning.sMap = make(map[string]string)
  192. c.Generation = currentGen
  193. log.Println("Generation Updated to: ", c.Generation)
  194. ControPlane.StatsTokens <- "GENERATION"
  195. c.Learning.busy.Unlock()
  196. c.Working.busy.Unlock()
  197. }
  198. }
  199. }