Switching to enthropy

This commit is contained in:
Loweel 2019-12-10 13:27:49 +01:00
parent a48a60145f
commit 210380c11b
4 changed files with 109 additions and 81 deletions

View file

@ -1,25 +1 @@
{
"LastUpdate": "2019-12-04 14:32:16.403693322 +0100 CET m=+0.016948951",
"GOOD": [
{
"Token": "/Gun/good",
"LastSeen": "2019-12-04 14:32:16.390739816 +0100 CET",
"Age": "13.201146ms"
}
],
"BAD": [
{
"Token": "/Penis/bad",
"LastSeen": "2019-12-04 14:32:16.389706996 +0100 CET",
"Age": "14.183289ms"
}
],
"MEH": [
{
"Token": "Dildo",
"LastSeen": "2019-12-04 14:32:16.390823335 +0100 CET",
"Age": "13.128746ms"
}
],
"STATS": null
}
{}

View file

@ -84,11 +84,16 @@ func feedRequest(req *http.Request, class string) {
feed := formatRequest(req)
tokens := strings.Fields(sanitizeHeaders(feed))
if class == "BAD" {
log.Println("Feeding BAD token: ", feed)
ControPlane.BadTokens <- sanitizeHeaders(feed)
for _, tk := range tokens {
ControPlane.BadTokens <- tk
}
}
@ -96,7 +101,10 @@ func feedRequest(req *http.Request, class string) {
log.Println("Feeding GOOD Token:", feed)
ControPlane.GoodTokens <- sanitizeHeaders(feed)
for _, tk := range tokens {
ControPlane.GoodTokens <- tk
}
}

34
file.go
View file

@ -1,7 +1,6 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
@ -37,33 +36,16 @@ func saveBayesToFile() {
log.Println("Trying to write json file")
defer handlepanic()
var jsnBuf = new(bytes.Buffer)
var tmpJSON string
var tmpJSON []byte
Classifier.busy.Lock()
DumpJSON, err := Classifier.bayez.MarshalJSON()
Classifier.Matrix.busy.Lock()
defer Classifier.Matrix.busy.Unlock()
tmpJSON, err := json.MarshalIndent(Classifier.Matrix.bScores, "", " ")
if err != nil {
DumpJSON = []byte(err.Error())
tmpJSON = []byte(err.Error())
}
Classifier.busy.Unlock()
log.Println("Raw dump of Classifier: ", string(DumpJSON))
jerr := json.Indent(jsnBuf, DumpJSON, "", " ")
if jerr == nil {
tmpJSON = jsnBuf.String()
} else {
tmpJSON = jerr.Error()
}
fmt.Println(time.Now().String())
Classifier.STATS.Range(func(key interface{}, value interface{}) bool {
fmt.Printf("%s : %d\n", key.(string), value.(int64))
return true
})
dumpfile := os.Getenv("DUMPFILE")
if dumpfile == "" {
@ -71,9 +53,9 @@ func saveBayesToFile() {
}
if DebugLog {
log.Println(tmpJSON)
log.Println("DUMP: ", string(tmpJSON))
} else {
writeToFile(dumpfile, tmpJSON)
writeToFile(dumpfile, string(tmpJSON))
log.Println("File saved: ", dumpfile)
}

118
matrix.go
View file

@ -2,13 +2,12 @@ package main
import (
"bufio"
"fmt"
"log"
"math"
"os"
"strings"
"sync"
"github.com/lytics/multibayes"
)
//ByControlPlane contains all the channels we need.
@ -18,14 +17,23 @@ type ByControlPlane struct {
StatsTokens chan string
}
type bScore struct {
BadScore float64
GoodScore float64
}
type bMap struct {
bScores map[string]bScore
busy sync.Mutex
}
//ControPlane is the variabile
var ControPlane ByControlPlane
//ByClassifier is the structure containing our Pseudo-Bayes classifier.
type ByClassifier struct {
STATS sync.Map
bayez *multibayes.Classifier
busy sync.Mutex
STATS sync.Map
Matrix bMap
}
//AddStats adds the statistics after proper blocking.
@ -44,39 +52,94 @@ func (c *ByClassifier) AddStats(action string) {
//IsBAD inserts a bad key in the right place.
func (c *ByClassifier) IsBAD(key string) {
c.busy.Lock()
defer c.busy.Unlock()
c.Matrix.busy.Lock()
defer c.Matrix.busy.Unlock()
c.bayez.Add(key, []string{"BAD"})
var t bScore
if val, ok := c.Matrix.bScores[key]; ok {
t.BadScore = val.BadScore + 1
t.GoodScore = val.GoodScore
} else {
t.BadScore = 1
t.GoodScore = 0
}
c.Matrix.bScores[key] = t
}
//IsGOOD inserts the key in the right place.
func (c *ByClassifier) IsGOOD(key string) {
c.busy.Lock()
defer c.busy.Unlock()
c.Matrix.busy.Lock()
defer c.Matrix.busy.Unlock()
c.bayez.Add(key, []string{"GOOD"})
var t bScore
if val, ok := c.Matrix.bScores[key]; ok {
t.GoodScore = val.GoodScore + 1
t.BadScore = val.BadScore
} else {
t.BadScore = 0
t.GoodScore = 1
}
c.Matrix.bScores[key] = t
}
//Posterior calculates the posterior probabilities in pseudo-bayes.
func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
defer func() {
c.Matrix.busy.Lock()
defer c.Matrix.busy.Unlock()
if a := recover(); a != nil {
fmt.Println("OPS!: Recovering from:", a)
ff = make(map[string]float64)
ff["BAD"] = 0.5
ff["GOOD"] = 0.5
tokens := strings.Fields(sanitizeHeaders(hdr))
lenTokens := float64(len(tokens))
ff := make(map[string]float64)
if lenTokens == 0 {
ff["BAD"] = 0.5
ff["GOOD"] = 0.5
return ff
}
log.Println("Start classification of: ", tokens)
var hBadM, hGoodM float64
for _, tk := range tokens {
if val, ok := c.Matrix.bScores[tk]; ok {
log.Println("Classifier found: ", tk)
if val.BadScore > 0 {
hBadM += val.BadScore * math.Log2(val.BadScore)
}
if val.GoodScore > 0 {
hGoodM += val.GoodScore * math.Log2(val.GoodScore)
}
}
}()
c.busy.Lock()
ff = c.bayez.Posterior(hdr)
defer c.busy.Unlock()
}
hBadM = math.Log2(lenTokens) - (hBadM / lenTokens)
hGoodM = math.Log2(lenTokens) - (hGoodM / lenTokens)
if math.Abs(hGoodM) >= math.Abs(hBadM) {
ff["GOOD"] = 1
ff["BAD"] = 0
} else {
ff["GOOD"] = 0
ff["BAD"] = 1
}
log.Println("Entropies: ", ff)
return ff
}
@ -87,10 +150,9 @@ func (c *ByClassifier) enroll() {
ControPlane.GoodTokens = make(chan string, 2048)
ControPlane.StatsTokens = make(chan string, 2048)
c.busy.Lock()
c.bayez = multibayes.NewClassifier()
c.bayez.MinClassSize = 0
c.busy.Unlock()
c.Matrix.busy.Lock()
c.Matrix.bScores = make(map[string]bScore)
c.Matrix.busy.Unlock()
c.readInitList("blacklist.txt", "BAD")
c.readInitList("whitelist.txt", "GOOD")