Switching to enthropy
This commit is contained in:
parent
a48a60145f
commit
210380c11b
4 changed files with 109 additions and 81 deletions
26
bayes.json
26
bayes.json
|
@ -1,25 +1 @@
|
||||||
{
|
{}
|
||||||
"LastUpdate": "2019-12-04 14:32:16.403693322 +0100 CET m=+0.016948951",
|
|
||||||
"GOOD": [
|
|
||||||
{
|
|
||||||
"Token": "/Gun/good",
|
|
||||||
"LastSeen": "2019-12-04 14:32:16.390739816 +0100 CET",
|
|
||||||
"Age": "13.201146ms"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"BAD": [
|
|
||||||
{
|
|
||||||
"Token": "/Penis/bad",
|
|
||||||
"LastSeen": "2019-12-04 14:32:16.389706996 +0100 CET",
|
|
||||||
"Age": "14.183289ms"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"MEH": [
|
|
||||||
{
|
|
||||||
"Token": "Dildo",
|
|
||||||
"LastSeen": "2019-12-04 14:32:16.390823335 +0100 CET",
|
|
||||||
"Age": "13.128746ms"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"STATS": null
|
|
||||||
}
|
|
|
@ -84,11 +84,16 @@ func feedRequest(req *http.Request, class string) {
|
||||||
|
|
||||||
feed := formatRequest(req)
|
feed := formatRequest(req)
|
||||||
|
|
||||||
|
tokens := strings.Fields(sanitizeHeaders(feed))
|
||||||
|
|
||||||
if class == "BAD" {
|
if class == "BAD" {
|
||||||
|
|
||||||
log.Println("Feeding BAD token: ", feed)
|
log.Println("Feeding BAD token: ", feed)
|
||||||
|
|
||||||
ControPlane.BadTokens <- sanitizeHeaders(feed)
|
for _, tk := range tokens {
|
||||||
|
|
||||||
|
ControPlane.BadTokens <- tk
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +101,10 @@ func feedRequest(req *http.Request, class string) {
|
||||||
|
|
||||||
log.Println("Feeding GOOD Token:", feed)
|
log.Println("Feeding GOOD Token:", feed)
|
||||||
|
|
||||||
ControPlane.GoodTokens <- sanitizeHeaders(feed)
|
for _, tk := range tokens {
|
||||||
|
|
||||||
|
ControPlane.GoodTokens <- tk
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
34
file.go
34
file.go
|
@ -1,7 +1,6 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
@ -37,33 +36,16 @@ func saveBayesToFile() {
|
||||||
|
|
||||||
log.Println("Trying to write json file")
|
log.Println("Trying to write json file")
|
||||||
defer handlepanic()
|
defer handlepanic()
|
||||||
|
|
||||||
|
|
||||||
var jsnBuf = new(bytes.Buffer)
|
var tmpJSON []byte
|
||||||
var tmpJSON string
|
|
||||||
|
|
||||||
Classifier.busy.Lock()
|
Classifier.Matrix.busy.Lock()
|
||||||
DumpJSON, err := Classifier.bayez.MarshalJSON()
|
defer Classifier.Matrix.busy.Unlock()
|
||||||
|
|
||||||
|
tmpJSON, err := json.MarshalIndent(Classifier.Matrix.bScores, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
DumpJSON = []byte(err.Error())
|
tmpJSON = []byte(err.Error())
|
||||||
}
|
}
|
||||||
Classifier.busy.Unlock()
|
|
||||||
|
|
||||||
log.Println("Raw dump of Classifier: ", string(DumpJSON))
|
|
||||||
|
|
||||||
jerr := json.Indent(jsnBuf, DumpJSON, "", " ")
|
|
||||||
if jerr == nil {
|
|
||||||
tmpJSON = jsnBuf.String()
|
|
||||||
} else {
|
|
||||||
tmpJSON = jerr.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Println(time.Now().String())
|
|
||||||
|
|
||||||
Classifier.STATS.Range(func(key interface{}, value interface{}) bool {
|
|
||||||
fmt.Printf("%s : %d\n", key.(string), value.(int64))
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
|
|
||||||
dumpfile := os.Getenv("DUMPFILE")
|
dumpfile := os.Getenv("DUMPFILE")
|
||||||
if dumpfile == "" {
|
if dumpfile == "" {
|
||||||
|
@ -71,9 +53,9 @@ func saveBayesToFile() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if DebugLog {
|
if DebugLog {
|
||||||
log.Println(tmpJSON)
|
log.Println("DUMP: ", string(tmpJSON))
|
||||||
} else {
|
} else {
|
||||||
writeToFile(dumpfile, tmpJSON)
|
writeToFile(dumpfile, string(tmpJSON))
|
||||||
|
|
||||||
log.Println("File saved: ", dumpfile)
|
log.Println("File saved: ", dumpfile)
|
||||||
}
|
}
|
||||||
|
|
118
matrix.go
118
matrix.go
|
@ -2,13 +2,12 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
|
||||||
"log"
|
"log"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/lytics/multibayes"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
//ByControlPlane contains all the channels we need.
|
//ByControlPlane contains all the channels we need.
|
||||||
|
@ -18,14 +17,23 @@ type ByControlPlane struct {
|
||||||
StatsTokens chan string
|
StatsTokens chan string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type bScore struct {
|
||||||
|
BadScore float64
|
||||||
|
GoodScore float64
|
||||||
|
}
|
||||||
|
|
||||||
|
type bMap struct {
|
||||||
|
bScores map[string]bScore
|
||||||
|
busy sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
//ControPlane is the variabile
|
//ControPlane is the variabile
|
||||||
var ControPlane ByControlPlane
|
var ControPlane ByControlPlane
|
||||||
|
|
||||||
//ByClassifier is the structure containing our Pseudo-Bayes classifier.
|
//ByClassifier is the structure containing our Pseudo-Bayes classifier.
|
||||||
type ByClassifier struct {
|
type ByClassifier struct {
|
||||||
STATS sync.Map
|
STATS sync.Map
|
||||||
bayez *multibayes.Classifier
|
Matrix bMap
|
||||||
busy sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//AddStats adds the statistics after proper blocking.
|
//AddStats adds the statistics after proper blocking.
|
||||||
|
@ -44,39 +52,94 @@ func (c *ByClassifier) AddStats(action string) {
|
||||||
//IsBAD inserts a bad key in the right place.
|
//IsBAD inserts a bad key in the right place.
|
||||||
func (c *ByClassifier) IsBAD(key string) {
|
func (c *ByClassifier) IsBAD(key string) {
|
||||||
|
|
||||||
c.busy.Lock()
|
c.Matrix.busy.Lock()
|
||||||
defer c.busy.Unlock()
|
defer c.Matrix.busy.Unlock()
|
||||||
|
|
||||||
c.bayez.Add(key, []string{"BAD"})
|
var t bScore
|
||||||
|
|
||||||
|
if val, ok := c.Matrix.bScores[key]; ok {
|
||||||
|
t.BadScore = val.BadScore + 1
|
||||||
|
t.GoodScore = val.GoodScore
|
||||||
|
} else {
|
||||||
|
t.BadScore = 1
|
||||||
|
t.GoodScore = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Matrix.bScores[key] = t
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//IsGOOD inserts the key in the right place.
|
//IsGOOD inserts the key in the right place.
|
||||||
func (c *ByClassifier) IsGOOD(key string) {
|
func (c *ByClassifier) IsGOOD(key string) {
|
||||||
|
|
||||||
c.busy.Lock()
|
c.Matrix.busy.Lock()
|
||||||
defer c.busy.Unlock()
|
defer c.Matrix.busy.Unlock()
|
||||||
|
|
||||||
c.bayez.Add(key, []string{"GOOD"})
|
var t bScore
|
||||||
|
|
||||||
|
if val, ok := c.Matrix.bScores[key]; ok {
|
||||||
|
t.GoodScore = val.GoodScore + 1
|
||||||
|
t.BadScore = val.BadScore
|
||||||
|
} else {
|
||||||
|
t.BadScore = 0
|
||||||
|
t.GoodScore = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Matrix.bScores[key] = t
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Posterior calculates the posterior probabilities in pseudo-bayes.
|
//Posterior calculates the posterior probabilities in pseudo-bayes.
|
||||||
func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
|
func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
|
||||||
|
|
||||||
defer func() {
|
c.Matrix.busy.Lock()
|
||||||
|
defer c.Matrix.busy.Unlock()
|
||||||
|
|
||||||
if a := recover(); a != nil {
|
tokens := strings.Fields(sanitizeHeaders(hdr))
|
||||||
fmt.Println("OPS!: Recovering from:", a)
|
lenTokens := float64(len(tokens))
|
||||||
ff = make(map[string]float64)
|
|
||||||
ff["BAD"] = 0.5
|
ff := make(map[string]float64)
|
||||||
ff["GOOD"] = 0.5
|
|
||||||
|
if lenTokens == 0 {
|
||||||
|
ff["BAD"] = 0.5
|
||||||
|
ff["GOOD"] = 0.5
|
||||||
|
return ff
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("Start classification of: ", tokens)
|
||||||
|
|
||||||
|
var hBadM, hGoodM float64
|
||||||
|
|
||||||
|
for _, tk := range tokens {
|
||||||
|
|
||||||
|
if val, ok := c.Matrix.bScores[tk]; ok {
|
||||||
|
log.Println("Classifier found: ", tk)
|
||||||
|
if val.BadScore > 0 {
|
||||||
|
hBadM += val.BadScore * math.Log2(val.BadScore)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if val.GoodScore > 0 {
|
||||||
|
hGoodM += val.GoodScore * math.Log2(val.GoodScore)
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
|
||||||
|
|
||||||
c.busy.Lock()
|
}
|
||||||
ff = c.bayez.Posterior(hdr)
|
|
||||||
defer c.busy.Unlock()
|
hBadM = math.Log2(lenTokens) - (hBadM / lenTokens)
|
||||||
|
hGoodM = math.Log2(lenTokens) - (hGoodM / lenTokens)
|
||||||
|
|
||||||
|
if math.Abs(hGoodM) >= math.Abs(hBadM) {
|
||||||
|
ff["GOOD"] = 1
|
||||||
|
ff["BAD"] = 0
|
||||||
|
} else {
|
||||||
|
ff["GOOD"] = 0
|
||||||
|
ff["BAD"] = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("Entropies: ", ff)
|
||||||
|
|
||||||
return ff
|
return ff
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -87,10 +150,9 @@ func (c *ByClassifier) enroll() {
|
||||||
ControPlane.GoodTokens = make(chan string, 2048)
|
ControPlane.GoodTokens = make(chan string, 2048)
|
||||||
ControPlane.StatsTokens = make(chan string, 2048)
|
ControPlane.StatsTokens = make(chan string, 2048)
|
||||||
|
|
||||||
c.busy.Lock()
|
c.Matrix.busy.Lock()
|
||||||
c.bayez = multibayes.NewClassifier()
|
c.Matrix.bScores = make(map[string]bScore)
|
||||||
c.bayez.MinClassSize = 0
|
c.Matrix.busy.Unlock()
|
||||||
c.busy.Unlock()
|
|
||||||
|
|
||||||
c.readInitList("blacklist.txt", "BAD")
|
c.readInitList("blacklist.txt", "BAD")
|
||||||
c.readInitList("whitelist.txt", "GOOD")
|
c.readInitList("whitelist.txt", "GOOD")
|
||||||
|
|
Loading…
Reference in a new issue