matrix.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. package main
  2. import (
  3. "bufio"
  4. "fmt"
  5. "log"
  6. "os"
  7. "sync"
  8. "github.com/lytics/multibayes"
  9. )
  10. //ByControlPlane contains all the channels we need.
  11. type ByControlPlane struct {
  12. BadTokens chan string
  13. GoodTokens chan string
  14. StatsTokens chan string
  15. }
  16. //ControPlane is the variabile
  17. var ControPlane ByControlPlane
  18. //ByClassifier is the structure containing our Pseudo-Bayes classifier.
  19. type ByClassifier struct {
  20. STATS sync.Map
  21. bayez *multibayes.Classifier
  22. busy sync.Mutex
  23. }
  24. //AddStats adds the statistics after proper blocking.
  25. func (c *ByClassifier) AddStats(action string) {
  26. var one int64 = 1
  27. if v, ok := c.STATS.Load(action); ok {
  28. c.STATS.Store(action, v.(int64)+1)
  29. } else {
  30. c.STATS.Store(action, one)
  31. }
  32. }
  33. //IsBAD inserts a bad key in the right place.
  34. func (c *ByClassifier) IsBAD(key string) {
  35. c.busy.Lock()
  36. defer c.busy.Unlock()
  37. c.bayez.Add(key, []string{"BAD"})
  38. }
  39. //IsGOOD inserts the key in the right place.
  40. func (c *ByClassifier) IsGOOD(key string) {
  41. c.busy.Lock()
  42. defer c.busy.Unlock()
  43. c.bayez.Add(key, []string{"GOOD"})
  44. }
  45. //Posterior calculates the posterior probabilities in pseudo-bayes.
  46. func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
  47. defer func() {
  48. if a := recover(); a != nil {
  49. fmt.Println("OPS!: Recovering from:", a)
  50. ff = make(map[string]float64)
  51. ff["BAD"] = 0.5
  52. ff["GOOD"] = 0.5
  53. }
  54. }()
  55. c.busy.Lock()
  56. ff = c.bayez.Posterior(hdr)
  57. defer c.busy.Unlock()
  58. return ff
  59. }
  60. func (c *ByClassifier) enroll() {
  61. ControPlane.BadTokens = make(chan string, 2048)
  62. ControPlane.GoodTokens = make(chan string, 2048)
  63. ControPlane.StatsTokens = make(chan string, 2048)
  64. c.busy.Lock()
  65. c.bayez = multibayes.NewClassifier()
  66. c.bayez.MinClassSize = 0
  67. c.busy.Unlock()
  68. c.readInitList("blacklist.txt", "BAD")
  69. c.readInitList("whitelist.txt", "GOOD")
  70. go c.readBadTokens()
  71. go c.readGoodTokens()
  72. go c.readStatsTokens()
  73. log.Println("Classifier populated...")
  74. }
  75. func (c *ByClassifier) readBadTokens() {
  76. log.Println("Start reading BAD tokens")
  77. for token := range ControPlane.BadTokens {
  78. log.Println("Received BAD Token: ", token)
  79. c.IsBAD(token)
  80. }
  81. }
  82. func (c *ByClassifier) readGoodTokens() {
  83. log.Println("Start reading GOOD tokens")
  84. for token := range ControPlane.GoodTokens {
  85. log.Println("Received GOOD Token: ", token)
  86. c.IsGOOD(token)
  87. }
  88. }
  89. func (c *ByClassifier) readStatsTokens() {
  90. log.Println("Start reading STATS tokens")
  91. for token := range ControPlane.StatsTokens {
  92. c.AddStats(token)
  93. }
  94. }
  95. func (c *ByClassifier) readInitList(filePath, class string) {
  96. inFile, err := os.Open(filePath)
  97. if err != nil {
  98. log.Println(err.Error() + `: ` + filePath)
  99. return
  100. }
  101. defer inFile.Close()
  102. scanner := bufio.NewScanner(inFile)
  103. for scanner.Scan() {
  104. if len(scanner.Text()) > 3 {
  105. switch class {
  106. case "BAD":
  107. log.Println("Loading into Blacklist: ", scanner.Text()) // the line
  108. c.IsBAD(scanner.Text())
  109. case "GOOD":
  110. log.Println("Loading into Whitelist: ", scanner.Text()) // the line
  111. c.IsGOOD(scanner.Text())
  112. }
  113. }
  114. }
  115. }