matrix.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. package main
  2. import (
  3. "bufio"
  4. "log"
  5. "math"
  6. "os"
  7. "strings"
  8. "sync"
  9. )
  10. //ByControlPlane contains all the channels we need.
  11. type ByControlPlane struct {
  12. BadTokens chan string
  13. GoodTokens chan string
  14. StatsTokens chan string
  15. }
  16. type bScore struct {
  17. BadScore float64
  18. GoodScore float64
  19. }
  20. type bMap struct {
  21. bScores map[string]bScore
  22. busy sync.Mutex
  23. }
  24. //ControPlane is the variabile
  25. var ControPlane ByControlPlane
  26. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  27. type ByClassifier struct {
  28. STATS sync.Map
  29. Matrix bMap
  30. }
  31. //AddStats adds the statistics after proper blocking.
  32. func (c *ByClassifier) AddStats(action string) {
  33. var one int64 = 1
  34. if v, ok := c.STATS.Load(action); ok {
  35. c.STATS.Store(action, v.(int64)+1)
  36. } else {
  37. c.STATS.Store(action, one)
  38. }
  39. }
  40. //IsBAD inserts a bad key in the right place.
  41. func (c *ByClassifier) IsBAD(key string) {
  42. c.Matrix.busy.Lock()
  43. defer c.Matrix.busy.Unlock()
  44. var t bScore
  45. if val, ok := c.Matrix.bScores[key]; ok {
  46. t.BadScore = val.BadScore + 1
  47. t.GoodScore = val.GoodScore
  48. } else {
  49. t.BadScore = 1
  50. t.GoodScore = 0
  51. }
  52. c.Matrix.bScores[key] = t
  53. }
  54. //IsGOOD inserts the key in the right place.
  55. func (c *ByClassifier) IsGOOD(key string) {
  56. c.Matrix.busy.Lock()
  57. defer c.Matrix.busy.Unlock()
  58. var t bScore
  59. if val, ok := c.Matrix.bScores[key]; ok {
  60. t.GoodScore = val.GoodScore + 1
  61. t.BadScore = val.BadScore
  62. } else {
  63. t.BadScore = 0
  64. t.GoodScore = 1
  65. }
  66. c.Matrix.bScores[key] = t
  67. }
  68. //Posterior calculates Shannon based entropy using bad and good as different distributions
  69. func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
  70. c.Matrix.busy.Lock()
  71. defer c.Matrix.busy.Unlock()
  72. tokens := strings.Fields(sanitizeHeaders(hdr))
  73. lenTokens := float64(len(tokens))
  74. ff := make(map[string]float64)
  75. if lenTokens == 0 {
  76. ff["BAD"] = 0.5
  77. ff["GOOD"] = 0.5
  78. return ff
  79. }
  80. log.Println("Start classification of: ", tokens)
  81. var hBadM, hGoodM float64
  82. for _, tk := range tokens {
  83. if val, ok := c.Matrix.bScores[tk]; ok {
  84. log.Println("Classifier found: ", tk)
  85. if val.BadScore > 0 {
  86. hBadM += val.BadScore * math.Log2(val.BadScore)
  87. }
  88. if val.GoodScore > 0 {
  89. hGoodM += val.GoodScore * math.Log2(val.GoodScore)
  90. }
  91. }
  92. }
  93. hBadM = math.Log2(lenTokens) - (hBadM / lenTokens)
  94. hGoodM = math.Log2(lenTokens) - (hGoodM / lenTokens)
  95. if math.Abs(hGoodM) >= math.Abs(hBadM) {
  96. ff["GOOD"] = 1
  97. ff["BAD"] = 0
  98. } else {
  99. ff["GOOD"] = 0
  100. ff["BAD"] = 1
  101. }
  102. log.Println("Entropies: ", ff)
  103. return ff
  104. }
  105. func (c *ByClassifier) enroll() {
  106. ControPlane.BadTokens = make(chan string, 2048)
  107. ControPlane.GoodTokens = make(chan string, 2048)
  108. ControPlane.StatsTokens = make(chan string, 2048)
  109. c.Matrix.busy.Lock()
  110. c.Matrix.bScores = make(map[string]bScore)
  111. c.Matrix.busy.Unlock()
  112. c.readInitList("blacklist.txt", "BAD")
  113. c.readInitList("whitelist.txt", "GOOD")
  114. go c.readBadTokens()
  115. go c.readGoodTokens()
  116. go c.readStatsTokens()
  117. log.Println("Classifier populated...")
  118. }
  119. func (c *ByClassifier) readBadTokens() {
  120. log.Println("Start reading BAD tokens")
  121. for token := range ControPlane.BadTokens {
  122. log.Println("Received BAD Token: ", token)
  123. c.IsBAD(token)
  124. }
  125. }
  126. func (c *ByClassifier) readGoodTokens() {
  127. log.Println("Start reading GOOD tokens")
  128. for token := range ControPlane.GoodTokens {
  129. log.Println("Received GOOD Token: ", token)
  130. c.IsGOOD(token)
  131. }
  132. }
  133. func (c *ByClassifier) readStatsTokens() {
  134. log.Println("Start reading STATS tokens")
  135. for token := range ControPlane.StatsTokens {
  136. c.AddStats(token)
  137. }
  138. }
  139. func (c *ByClassifier) readInitList(filePath, class string) {
  140. inFile, err := os.Open(filePath)
  141. if err != nil {
  142. log.Println(err.Error() + `: ` + filePath)
  143. return
  144. }
  145. defer inFile.Close()
  146. scanner := bufio.NewScanner(inFile)
  147. for scanner.Scan() {
  148. if len(scanner.Text()) > 3 {
  149. switch class {
  150. case "BAD":
  151. log.Println("Loading into Blacklist: ", scanner.Text()) // the line
  152. c.IsBAD(scanner.Text())
  153. case "GOOD":
  154. log.Println("Loading into Whitelist: ", scanner.Text()) // the line
  155. c.IsGOOD(scanner.Text())
  156. }
  157. }
  158. }
  159. }