matrix.old 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. package main
  2. import (
  3. "log"
  4. "sort"
  5. "strings"
  6. "time"
  7. "github.com/n3integration/classifier/naive"
  8. )
  9. //ByControlplane contains all the channels we need.
  10. type ByControlPlane struct {
  11. BadTokens chan string
  12. GoodTokens chan string
  13. StatsTokens chan string
  14. }
  15. //Controlplane is the variabile
  16. var ControPlane ByControlPlane
  17. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  18. type ByClassifier struct {
  19. GOOD map[string]float64
  20. BAD map[string]float64
  21. MEH map[string]float64
  22. STATS map[string]int64
  23. }
  24. //AddStats adds the statistics after proper blocking.
  25. func (c *ByClassifier) AddStats(action string) {
  26. if _, ok := c.STATS[action]; ok {
  27. c.STATS[action]++
  28. } else {
  29. c.STATS[action] = 1
  30. }
  31. }
  32. //IsBAD inserts a bad key in the right place.
  33. func (c *ByClassifier) IsBAD(key string) {
  34. if _, ok := c.MEH[key]; ok {
  35. c.MEH[key]++
  36. return
  37. }
  38. if score, ok := c.GOOD[key]; ok {
  39. c.MEH[key] = score + 1
  40. delete(c.GOOD, key)
  41. return
  42. }
  43. if _, ok := c.BAD[key]; ok {
  44. c.BAD[key]++
  45. return
  46. }
  47. c.BAD[key] = 1
  48. }
  49. //IsGOOD inserts the key in the right place.
  50. func (c *ByClassifier) IsGOOD(key string) {
  51. if _, ok := c.MEH[key]; ok {
  52. c.MEH[key]++
  53. return
  54. }
  55. if score, ok := c.BAD[key]; ok {
  56. c.MEH[key] = score + 1
  57. delete(c.BAD, key)
  58. return
  59. }
  60. if _, ok := c.GOOD[key]; ok {
  61. c.GOOD[key]++
  62. return
  63. }
  64. c.GOOD[key] = 1
  65. }
  66. //Posterior calculates the posterior probabilities in pseudo-bayes.
  67. func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
  68. headers := strings.Fields(sanitizeHeaders(hdr))
  69. var result = make(map[string]float64)
  70. result["BAD"] = 0
  71. result["GOOD"] = 0
  72. var tmpResGood, tmpResBad float64
  73. for _, token := range headers {
  74. if _, ok := c.BAD[token]; ok {
  75. tmpResBad += c.BAD[token]
  76. }
  77. if _, ok := c.GOOD[token]; ok {
  78. tmpResGood += c.GOOD[token]
  79. }
  80. }
  81. tmpTotal := tmpResBad + tmpResGood
  82. if tmpTotal == 0 {
  83. tmpTotal = 1
  84. }
  85. result["BAD"] = tmpResBad / tmpTotal
  86. result["GOOD"] = tmpResGood / tmpTotal
  87. log.Println(c.Bayes(hdr))
  88. return result
  89. }
  90. //Janitor keeps the maps under a certain size, keeping the biggest values.
  91. func (c *ByClassifier) Janitor(size int) {
  92. log.Println("Janitor Running")
  93. c.BAD = sortMap(c.BAD, size)
  94. c.GOOD = sortMap(c.GOOD, size)
  95. c.MEH = sortMap(c.MEH, size)
  96. log.Println("Janitor Finished.")
  97. }
  98. //CleanThread is the Janitor thread
  99. func (c *ByClassifier) CleanThread() {
  100. for {
  101. time.Sleep(10 * time.Minute)
  102. c.Janitor(1024)
  103. }
  104. }
  105. func (c *ByClassifier) enroll() {
  106. c.BAD = make(map[string]float64)
  107. c.GOOD = make(map[string]float64)
  108. c.MEH = make(map[string]float64)
  109. c.STATS = make(map[string]int64)
  110. ControPlane.BadTokens = make(chan string, 2048)
  111. ControPlane.GoodTokens = make(chan string, 2048)
  112. ControPlane.StatsTokens = make(chan string, 2048)
  113. c.IsBAD("Penis")
  114. c.IsGOOD("Gun")
  115. c.MEH["meh"] = 0
  116. go c.readBadTokens()
  117. go c.readGoodTokens()
  118. go c.readStatsTokens()
  119. log.Println("Classifier populated...")
  120. go c.CleanThread()
  121. log.Println("Janitor Started")
  122. }
  123. func sortMap(unsorted map[string]float64, size int) map[string]float64 {
  124. retMap := make(map[string]float64)
  125. type Myt struct {
  126. Name string
  127. Num float64
  128. }
  129. var tempCont []Myt
  130. var tc Myt
  131. for k, v := range unsorted {
  132. tc.Name = k
  133. tc.Num = v
  134. tempCont = append(tempCont, tc)
  135. }
  136. sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
  137. if len(tempCont) > size {
  138. tempCont = tempCont[:size]
  139. }
  140. for _, a := range tempCont {
  141. retMap[a.Name] = a.Num
  142. }
  143. return retMap
  144. }
  145. func (c *ByClassifier) readBadTokens() {
  146. log.Println("Start reading BAD tokens")
  147. for token := range ControPlane.BadTokens {
  148. c.IsBAD(token)
  149. }
  150. }
  151. func (c *ByClassifier) readGoodTokens() {
  152. log.Println("Start reading GOOD tokens")
  153. for token := range ControPlane.GoodTokens {
  154. c.IsGOOD(token)
  155. }
  156. }
  157. func (c *ByClassifier) readStatsTokens() {
  158. log.Println("Start reading STATS tokens")
  159. for token := range ControPlane.StatsTokens {
  160. c.AddStats(token)
  161. }
  162. }
  163. func (c *ByClassifier) Bayes(hdr string) string {
  164. classifier := naive.New()
  165. headers := sanitizeHeaders(hdr)
  166. for k, _ := range c.BAD {
  167. classifier.TrainString(k, "BAD")
  168. }
  169. for k, _ := range c.GOOD {
  170. classifier.TrainString(k, "GOOD")
  171. }
  172. if classification, err := classifier.ClassifyString(headers); err == nil {
  173. return classification // ham
  174. } else {
  175. log.Println("error: ", err)
  176. }
  177. return ""
  178. }