matrix.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. package main
  2. import (
  3. "bufio"
  4. "fmt"
  5. "log"
  6. "os"
  7. "sort"
  8. "strconv"
  9. "sync"
  10. "time"
  11. "github.com/lytics/multibayes"
  12. )
  13. //ByControlPlane contains all the channels we need.
  14. type ByControlPlane struct {
  15. BadTokens chan string
  16. GoodTokens chan string
  17. StatsTokens chan string
  18. }
  19. //ControPlane is the variabile
  20. var ControPlane ByControlPlane
  21. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  22. type ByClassifier struct {
  23. GOOD sync.Map
  24. BAD sync.Map
  25. MEH sync.Map
  26. STATS sync.Map
  27. bayez *multibayes.Classifier
  28. }
  29. //AddStats adds the statistics after proper blocking.
  30. func (c *ByClassifier) AddStats(action string) {
  31. var one int64 = 1
  32. if v, ok := c.STATS.Load(action); ok {
  33. c.STATS.Store(action, v.(int64)+1)
  34. } else {
  35. c.STATS.Store(action, one)
  36. }
  37. }
  38. //IsBAD inserts a bad key in the right place.
  39. func (c *ByClassifier) IsBAD(key string) {
  40. if _, ok := c.MEH.Load(key); ok {
  41. c.MEH.Store(key, time.Now().UnixNano())
  42. log.Println("Updated BAD into MEH: ", key)
  43. return
  44. }
  45. if _, ok := c.GOOD.Load(key); ok {
  46. c.MEH.Store(key, time.Now().UnixNano())
  47. c.GOOD.Delete(key)
  48. log.Println("Moved to MEH from GOOD: ", key)
  49. return
  50. }
  51. c.BAD.Store(key, time.Now().UnixNano())
  52. log.Println("Stored into BAD: ", key)
  53. }
  54. //IsGOOD inserts the key in the right place.
  55. func (c *ByClassifier) IsGOOD(key string) {
  56. if _, ok := c.MEH.Load(key); ok {
  57. c.MEH.Store(key, time.Now().UnixNano())
  58. log.Println("Updated GOOD into MEH: ", key)
  59. return
  60. }
  61. if _, ok := c.BAD.Load(key); ok {
  62. c.MEH.Store(key, time.Now().UnixNano())
  63. c.BAD.Delete(key)
  64. log.Println("Moved to MEH from BAD: ", key)
  65. return
  66. }
  67. c.GOOD.Store(key, time.Now().UnixNano())
  68. log.Println("Stored into GOOD: ", key)
  69. }
  70. //Posterior calculates the posterior probabilities in pseudo-bayes.
  71. func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
  72. defer func() {
  73. if a := recover(); a != nil {
  74. fmt.Println("OPS!: Recovering from:", a)
  75. ff = make(map[string]float64)
  76. ff["BAD"] = 0.5
  77. ff["GOOD"] = 0.5
  78. }
  79. }()
  80. return c.bayez.Posterior(hdr)
  81. }
  82. //Janitor keeps the maps under a certain size, keeping the biggest values.
  83. func (c *ByClassifier) Janitor(size int) {
  84. log.Println("Janitor Running")
  85. sortMap(&c.BAD, size)
  86. sortMap(&c.GOOD, size)
  87. sortMap(&c.MEH, size)
  88. log.Println("Janitor Finished.")
  89. }
  90. //RefreshBayes refresh the bayesian using values we stored
  91. func (c *ByClassifier) RefreshBayes() {
  92. defer func() {
  93. if a := recover(); a != nil {
  94. fmt.Println("OPS!: Recovering from:", a)
  95. }
  96. }()
  97. log.Println("RefreshBayes Thread started")
  98. ticker := time.NewTicker(5 * time.Minute)
  99. for ; true; <-ticker.C {
  100. c.bayez = multibayes.NewClassifier()
  101. c.bayez.MinClassSize = 0
  102. c.BAD.Range(func(key interface{}, value interface{}) bool {
  103. c.bayez.Add(key.(string), []string{"BAD"})
  104. return true
  105. })
  106. c.GOOD.Range(func(key interface{}, value interface{}) bool {
  107. c.bayez.Add(key.(string), []string{"GOOD"})
  108. return true
  109. })
  110. c.MEH.Range(func(key interface{}, value interface{}) bool {
  111. c.bayez.Add(key.(string), []string{"GOOD", "BAD"})
  112. return true
  113. })
  114. }
  115. }
  116. //CleanThread is the Janitor thread
  117. func (c *ByClassifier) CleanThread() {
  118. for {
  119. MaxSize, err := strconv.Atoi(fmt.Sprintf("%d", Maturity))
  120. if err != nil {
  121. MaxSize = 1000
  122. log.Println("Maxsize converted to: ", MaxSize)
  123. }
  124. log.Println("Janitor Maxsize is now:", MaxSize)
  125. time.Sleep(10 * time.Second)
  126. c.Janitor(MaxSize)
  127. }
  128. }
  129. func (c *ByClassifier) enroll() {
  130. ControPlane.BadTokens = make(chan string, 2048)
  131. ControPlane.GoodTokens = make(chan string, 2048)
  132. ControPlane.StatsTokens = make(chan string, 2048)
  133. c.bayez = multibayes.NewClassifier()
  134. c.bayez.MinClassSize = 0
  135. c.readInitList("blacklist.txt", "BAD")
  136. c.readInitList("whitelist.txt", "GOOD")
  137. c.MEH.Store("Dildo", time.Now().UnixNano())
  138. go c.readBadTokens()
  139. go c.readGoodTokens()
  140. go c.readStatsTokens()
  141. log.Println("Classifier populated...")
  142. go c.CleanThread()
  143. go c.RefreshBayes()
  144. log.Println("Janitor Started")
  145. }
  146. func sortMap(unsorted *sync.Map, size int) {
  147. type Myt struct {
  148. Name string
  149. Num int64
  150. }
  151. var tempCont []Myt
  152. var tc Myt
  153. unsorted.Range(func(key interface{}, value interface{}) bool {
  154. tc.Name = key.(string)
  155. tc.Num = value.(int64)
  156. tempCont = append(tempCont, tc)
  157. return true
  158. })
  159. sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
  160. if size > 0 && len(tempCont) > size {
  161. tempCont = tempCont[:size]
  162. }
  163. unsorted.Range(func(key interface{}, value interface{}) bool {
  164. unsorted.Delete(key)
  165. return true
  166. })
  167. for _, val := range tempCont {
  168. unsorted.Store(val.Name, val.Num)
  169. }
  170. }
  171. func (c *ByClassifier) readBadTokens() {
  172. log.Println("Start reading BAD tokens")
  173. for token := range ControPlane.BadTokens {
  174. log.Println("Received BAD Token: ", token)
  175. c.IsBAD(token)
  176. }
  177. }
  178. func (c *ByClassifier) readGoodTokens() {
  179. log.Println("Start reading GOOD tokens")
  180. for token := range ControPlane.GoodTokens {
  181. log.Println("Received GOOD Token: ", token)
  182. c.IsGOOD(token)
  183. }
  184. }
  185. func (c *ByClassifier) readStatsTokens() {
  186. log.Println("Start reading STATS tokens")
  187. for token := range ControPlane.StatsTokens {
  188. c.AddStats(token)
  189. }
  190. }
  191. func (c *ByClassifier) readInitList(filePath, class string) {
  192. inFile, err := os.Open(filePath)
  193. if err != nil {
  194. log.Println(err.Error() + `: ` + filePath)
  195. return
  196. }
  197. defer inFile.Close()
  198. scanner := bufio.NewScanner(inFile)
  199. for scanner.Scan() {
  200. if len(scanner.Text()) > 3 {
  201. switch class {
  202. case "BAD":
  203. log.Println("Loading into Blacklist: ", scanner.Text()) // the line
  204. c.IsBAD(scanner.Text())
  205. case "GOOD":
  206. log.Println("Loading into Whitelist: ", scanner.Text()) // the line
  207. c.IsGOOD(scanner.Text())
  208. }
  209. }
  210. }
  211. }