Switching to enthropy
This commit is contained in:
parent
a48a60145f
commit
210380c11b
4 changed files with 109 additions and 81 deletions
26
bayes.json
26
bayes.json
|
@ -1,25 +1 @@
|
|||
{
|
||||
"LastUpdate": "2019-12-04 14:32:16.403693322 +0100 CET m=+0.016948951",
|
||||
"GOOD": [
|
||||
{
|
||||
"Token": "/Gun/good",
|
||||
"LastSeen": "2019-12-04 14:32:16.390739816 +0100 CET",
|
||||
"Age": "13.201146ms"
|
||||
}
|
||||
],
|
||||
"BAD": [
|
||||
{
|
||||
"Token": "/Penis/bad",
|
||||
"LastSeen": "2019-12-04 14:32:16.389706996 +0100 CET",
|
||||
"Age": "14.183289ms"
|
||||
}
|
||||
],
|
||||
"MEH": [
|
||||
{
|
||||
"Token": "Dildo",
|
||||
"LastSeen": "2019-12-04 14:32:16.390823335 +0100 CET",
|
||||
"Age": "13.128746ms"
|
||||
}
|
||||
],
|
||||
"STATS": null
|
||||
}
|
||||
{}
|
|
@ -84,11 +84,16 @@ func feedRequest(req *http.Request, class string) {
|
|||
|
||||
feed := formatRequest(req)
|
||||
|
||||
tokens := strings.Fields(sanitizeHeaders(feed))
|
||||
|
||||
if class == "BAD" {
|
||||
|
||||
log.Println("Feeding BAD token: ", feed)
|
||||
|
||||
ControPlane.BadTokens <- sanitizeHeaders(feed)
|
||||
for _, tk := range tokens {
|
||||
|
||||
ControPlane.BadTokens <- tk
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -96,7 +101,10 @@ func feedRequest(req *http.Request, class string) {
|
|||
|
||||
log.Println("Feeding GOOD Token:", feed)
|
||||
|
||||
ControPlane.GoodTokens <- sanitizeHeaders(feed)
|
||||
for _, tk := range tokens {
|
||||
|
||||
ControPlane.GoodTokens <- tk
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
34
file.go
34
file.go
|
@ -1,7 +1,6 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
@ -37,33 +36,16 @@ func saveBayesToFile() {
|
|||
|
||||
log.Println("Trying to write json file")
|
||||
defer handlepanic()
|
||||
|
||||
|
||||
var jsnBuf = new(bytes.Buffer)
|
||||
var tmpJSON string
|
||||
var tmpJSON []byte
|
||||
|
||||
Classifier.busy.Lock()
|
||||
DumpJSON, err := Classifier.bayez.MarshalJSON()
|
||||
Classifier.Matrix.busy.Lock()
|
||||
defer Classifier.Matrix.busy.Unlock()
|
||||
|
||||
tmpJSON, err := json.MarshalIndent(Classifier.Matrix.bScores, "", " ")
|
||||
if err != nil {
|
||||
DumpJSON = []byte(err.Error())
|
||||
tmpJSON = []byte(err.Error())
|
||||
}
|
||||
Classifier.busy.Unlock()
|
||||
|
||||
log.Println("Raw dump of Classifier: ", string(DumpJSON))
|
||||
|
||||
jerr := json.Indent(jsnBuf, DumpJSON, "", " ")
|
||||
if jerr == nil {
|
||||
tmpJSON = jsnBuf.String()
|
||||
} else {
|
||||
tmpJSON = jerr.Error()
|
||||
}
|
||||
|
||||
fmt.Println(time.Now().String())
|
||||
|
||||
Classifier.STATS.Range(func(key interface{}, value interface{}) bool {
|
||||
fmt.Printf("%s : %d\n", key.(string), value.(int64))
|
||||
return true
|
||||
})
|
||||
|
||||
dumpfile := os.Getenv("DUMPFILE")
|
||||
if dumpfile == "" {
|
||||
|
@ -71,9 +53,9 @@ func saveBayesToFile() {
|
|||
}
|
||||
|
||||
if DebugLog {
|
||||
log.Println(tmpJSON)
|
||||
log.Println("DUMP: ", string(tmpJSON))
|
||||
} else {
|
||||
writeToFile(dumpfile, tmpJSON)
|
||||
writeToFile(dumpfile, string(tmpJSON))
|
||||
|
||||
log.Println("File saved: ", dumpfile)
|
||||
}
|
||||
|
|
118
matrix.go
118
matrix.go
|
@ -2,13 +2,12 @@ package main
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/lytics/multibayes"
|
||||
)
|
||||
|
||||
//ByControlPlane contains all the channels we need.
|
||||
|
@ -18,14 +17,23 @@ type ByControlPlane struct {
|
|||
StatsTokens chan string
|
||||
}
|
||||
|
||||
type bScore struct {
|
||||
BadScore float64
|
||||
GoodScore float64
|
||||
}
|
||||
|
||||
type bMap struct {
|
||||
bScores map[string]bScore
|
||||
busy sync.Mutex
|
||||
}
|
||||
|
||||
//ControPlane is the variabile
|
||||
var ControPlane ByControlPlane
|
||||
|
||||
//ByClassifier is the structure containing our Pseudo-Bayes classifier.
|
||||
type ByClassifier struct {
|
||||
STATS sync.Map
|
||||
bayez *multibayes.Classifier
|
||||
busy sync.Mutex
|
||||
STATS sync.Map
|
||||
Matrix bMap
|
||||
}
|
||||
|
||||
//AddStats adds the statistics after proper blocking.
|
||||
|
@ -44,39 +52,94 @@ func (c *ByClassifier) AddStats(action string) {
|
|||
//IsBAD inserts a bad key in the right place.
|
||||
func (c *ByClassifier) IsBAD(key string) {
|
||||
|
||||
c.busy.Lock()
|
||||
defer c.busy.Unlock()
|
||||
c.Matrix.busy.Lock()
|
||||
defer c.Matrix.busy.Unlock()
|
||||
|
||||
c.bayez.Add(key, []string{"BAD"})
|
||||
var t bScore
|
||||
|
||||
if val, ok := c.Matrix.bScores[key]; ok {
|
||||
t.BadScore = val.BadScore + 1
|
||||
t.GoodScore = val.GoodScore
|
||||
} else {
|
||||
t.BadScore = 1
|
||||
t.GoodScore = 0
|
||||
}
|
||||
|
||||
c.Matrix.bScores[key] = t
|
||||
|
||||
}
|
||||
|
||||
//IsGOOD inserts the key in the right place.
|
||||
func (c *ByClassifier) IsGOOD(key string) {
|
||||
|
||||
c.busy.Lock()
|
||||
defer c.busy.Unlock()
|
||||
c.Matrix.busy.Lock()
|
||||
defer c.Matrix.busy.Unlock()
|
||||
|
||||
c.bayez.Add(key, []string{"GOOD"})
|
||||
var t bScore
|
||||
|
||||
if val, ok := c.Matrix.bScores[key]; ok {
|
||||
t.GoodScore = val.GoodScore + 1
|
||||
t.BadScore = val.BadScore
|
||||
} else {
|
||||
t.BadScore = 0
|
||||
t.GoodScore = 1
|
||||
}
|
||||
|
||||
c.Matrix.bScores[key] = t
|
||||
|
||||
}
|
||||
|
||||
//Posterior calculates the posterior probabilities in pseudo-bayes.
|
||||
func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
|
||||
func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
|
||||
|
||||
defer func() {
|
||||
c.Matrix.busy.Lock()
|
||||
defer c.Matrix.busy.Unlock()
|
||||
|
||||
if a := recover(); a != nil {
|
||||
fmt.Println("OPS!: Recovering from:", a)
|
||||
ff = make(map[string]float64)
|
||||
ff["BAD"] = 0.5
|
||||
ff["GOOD"] = 0.5
|
||||
tokens := strings.Fields(sanitizeHeaders(hdr))
|
||||
lenTokens := float64(len(tokens))
|
||||
|
||||
ff := make(map[string]float64)
|
||||
|
||||
if lenTokens == 0 {
|
||||
ff["BAD"] = 0.5
|
||||
ff["GOOD"] = 0.5
|
||||
return ff
|
||||
}
|
||||
|
||||
log.Println("Start classification of: ", tokens)
|
||||
|
||||
var hBadM, hGoodM float64
|
||||
|
||||
for _, tk := range tokens {
|
||||
|
||||
if val, ok := c.Matrix.bScores[tk]; ok {
|
||||
log.Println("Classifier found: ", tk)
|
||||
if val.BadScore > 0 {
|
||||
hBadM += val.BadScore * math.Log2(val.BadScore)
|
||||
|
||||
}
|
||||
|
||||
if val.GoodScore > 0 {
|
||||
hGoodM += val.GoodScore * math.Log2(val.GoodScore)
|
||||
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
c.busy.Lock()
|
||||
ff = c.bayez.Posterior(hdr)
|
||||
defer c.busy.Unlock()
|
||||
}
|
||||
|
||||
hBadM = math.Log2(lenTokens) - (hBadM / lenTokens)
|
||||
hGoodM = math.Log2(lenTokens) - (hGoodM / lenTokens)
|
||||
|
||||
if math.Abs(hGoodM) >= math.Abs(hBadM) {
|
||||
ff["GOOD"] = 1
|
||||
ff["BAD"] = 0
|
||||
} else {
|
||||
ff["GOOD"] = 0
|
||||
ff["BAD"] = 1
|
||||
}
|
||||
|
||||
log.Println("Entropies: ", ff)
|
||||
|
||||
return ff
|
||||
|
||||
}
|
||||
|
@ -87,10 +150,9 @@ func (c *ByClassifier) enroll() {
|
|||
ControPlane.GoodTokens = make(chan string, 2048)
|
||||
ControPlane.StatsTokens = make(chan string, 2048)
|
||||
|
||||
c.busy.Lock()
|
||||
c.bayez = multibayes.NewClassifier()
|
||||
c.bayez.MinClassSize = 0
|
||||
c.busy.Unlock()
|
||||
c.Matrix.busy.Lock()
|
||||
c.Matrix.bScores = make(map[string]bScore)
|
||||
c.Matrix.busy.Unlock()
|
||||
|
||||
c.readInitList("blacklist.txt", "BAD")
|
||||
c.readInitList("whitelist.txt", "GOOD")
|
||||
|
|
Loading…
Reference in a new issue