matrix.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. package main
  2. import (
  3. "bufio"
  4. "log"
  5. "os"
  6. "strings"
  7. "sync"
  8. )
  9. //ByControlPlane contains all the channels we need.
  10. type ByControlPlane struct {
  11. BadTokens chan string
  12. GoodTokens chan string
  13. StatsTokens chan string
  14. }
  15. type bScore struct {
  16. BadScore float64
  17. GoodScore float64
  18. }
  19. type bMap struct {
  20. bScores map[string]bScore
  21. busy sync.Mutex
  22. }
  23. //ControPlane is the variabile
  24. var ControPlane ByControlPlane
  25. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  26. type ByClassifier struct {
  27. STATS sync.Map
  28. Matrix bMap
  29. bReg float64
  30. gReg float64
  31. }
  32. //AddStats adds the statistics after proper blocking.
  33. func (c *ByClassifier) AddStats(action string) {
  34. var one int64 = 1
  35. if v, ok := c.STATS.Load(action); ok {
  36. c.STATS.Store(action, v.(int64)+1)
  37. } else {
  38. c.STATS.Store(action, one)
  39. }
  40. }
  41. //IsBAD inserts a bad key in the right place.
  42. func (c *ByClassifier) IsBAD(key string) {
  43. c.Matrix.busy.Lock()
  44. defer c.Matrix.busy.Unlock()
  45. var t bScore
  46. if val, ok := c.Matrix.bScores[key]; ok {
  47. t.BadScore = val.BadScore + 1
  48. t.GoodScore = val.GoodScore
  49. } else {
  50. t.BadScore = 1
  51. t.GoodScore = 0
  52. }
  53. c.Matrix.bScores[key] = t
  54. c.bReg++
  55. }
  56. //IsGOOD inserts the key in the right place.
  57. func (c *ByClassifier) IsGOOD(key string) {
  58. c.Matrix.busy.Lock()
  59. defer c.Matrix.busy.Unlock()
  60. var t bScore
  61. if val, ok := c.Matrix.bScores[key]; ok {
  62. t.GoodScore = val.GoodScore + 1
  63. t.BadScore = val.BadScore
  64. } else {
  65. t.BadScore = 0
  66. t.GoodScore = 1
  67. }
  68. c.Matrix.bScores[key] = t
  69. c.gReg++
  70. }
  71. //Posterior calculates Shannon based entropy using bad and good as different distributions
  72. func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
  73. c.Matrix.busy.Lock()
  74. defer c.Matrix.busy.Unlock()
  75. tokens := strings.Fields(sanitizeHeaders(hdr))
  76. lenTokens := float64(len(tokens))
  77. ff := make(map[string]float64)
  78. if lenTokens == 0 {
  79. ff["BAD"] = 0.5
  80. ff["GOOD"] = 0.5
  81. return ff
  82. }
  83. log.Println("Start classification of: ", tokens)
  84. var hBadM, hGoodM float64
  85. for _, tk := range tokens {
  86. if val, ok := c.Matrix.bScores[tk]; ok {
  87. if val.BadScore > 0 {
  88. hBadM += val.BadScore
  89. }
  90. if val.GoodScore > 0 {
  91. hGoodM += val.GoodScore
  92. }
  93. }
  94. }
  95. ff["BAD"] = hBadM / (c.bReg * lenTokens)
  96. ff["GOOD"] = hGoodM / (c.gReg * lenTokens)
  97. log.Println("Entropies: ", ff)
  98. return ff
  99. }
  100. func (c *ByClassifier) enroll() {
  101. ControPlane.BadTokens = make(chan string, 2048)
  102. ControPlane.GoodTokens = make(chan string, 2048)
  103. ControPlane.StatsTokens = make(chan string, 2048)
  104. c.Matrix.busy.Lock()
  105. c.Matrix.bScores = make(map[string]bScore)
  106. c.Matrix.busy.Unlock()
  107. c.bReg = 0
  108. c.gReg = 0
  109. c.readInitList("blacklist.txt", "BAD")
  110. c.readInitList("whitelist.txt", "GOOD")
  111. go c.readBadTokens()
  112. go c.readGoodTokens()
  113. go c.readStatsTokens()
  114. log.Println("Classifier populated...")
  115. }
  116. func (c *ByClassifier) readBadTokens() {
  117. log.Println("Start reading BAD tokens")
  118. for token := range ControPlane.BadTokens {
  119. log.Println("Received BAD Token: ", token)
  120. c.IsBAD(token)
  121. }
  122. }
  123. func (c *ByClassifier) readGoodTokens() {
  124. log.Println("Start reading GOOD tokens")
  125. for token := range ControPlane.GoodTokens {
  126. log.Println("Received GOOD Token: ", token)
  127. c.IsGOOD(token)
  128. }
  129. }
  130. func (c *ByClassifier) readStatsTokens() {
  131. log.Println("Start reading STATS tokens")
  132. for token := range ControPlane.StatsTokens {
  133. c.AddStats(token)
  134. }
  135. }
  136. func (c *ByClassifier) readInitList(filePath, class string) {
  137. inFile, err := os.Open(filePath)
  138. if err != nil {
  139. log.Println(err.Error() + `: ` + filePath)
  140. return
  141. }
  142. defer inFile.Close()
  143. scanner := bufio.NewScanner(inFile)
  144. for scanner.Scan() {
  145. if len(scanner.Text()) > 3 {
  146. switch class {
  147. case "BAD":
  148. log.Println("Loading into Blacklist: ", scanner.Text()) // the line
  149. c.IsBAD(scanner.Text())
  150. case "GOOD":
  151. log.Println("Loading into Whitelist: ", scanner.Text()) // the line
  152. c.IsGOOD(scanner.Text())
  153. }
  154. }
  155. }
  156. }