Browse Source

Merge branch 'master' of https://git.keinpfusch.net/loweel/zardoz

Loweel 4 years ago
parent
commit
fe6127e441
11 changed files with 227 additions and 327 deletions
  1. 11 3
      README.md
  2. 2 0
      alloc.go
  3. 25 0
      bayes.json
  4. 4 0
      blacklist.txt
  5. 17 5
      classifier.go
  6. 70 15
      file.go
  7. 4 3
      handler.go
  8. 87 44
      matrix.go
  9. 0 254
      matrix.old
  10. 3 3
      run.sh
  11. 4 0
      whitelist.txt

+ 11 - 3
README.md

@@ -10,7 +10,7 @@ It is designed to don't consume much memory neither CPU, so that you don't need
 
 ## STATUS:
 
-This is just an experiment I'm doing with Pseudo-Bayes classifiers. Run in production at your own risk.
+This is just an experiment I'm doing with Pseudo-Bayes classifiers. It works pretty well with my blog. Run in production at your own risk.
 
 
 ## Compiling:
@@ -40,6 +40,7 @@ ENV TRIGGER	0.6
 ENV SENIORITY	1025
 ENV DEBUG false
 ENV DUMPFILE /somewhere/bayes.txt
+ENV REFRESHTIME 24h
 ```
 
 Using a bash script, this means something like:
@@ -50,7 +51,8 @@ export PROXYPORT=":17000"
 export TRIGGER="0.6"
 export SENIORITY="1025"
 export DEBUG="true"
-expost DUMPFILE="/somewhere/bayes.txt"
+export DUMPFILE="/somewhere/bayes.txt"
+export REFRESHTIME 24h
 ./zardoz 
 ```
 
@@ -81,12 +83,18 @@ Personally I've got good results putting the trigger at 0.6, meaning this is not
 
 **SENIORITY**:  since Zardoz will learn what is good for your web server, it takes time to gain seniority. To start Zardoz as empty and leave it to decide will generate some terrible behavior, because of false positives and false negatives. Plus, at the beginning Zardoz is supposed to ALWAYS learn.
 
-The parameter "SENIORITY" is then the amount of requests it will set in "PASS+LEARN" before of activating the filtering. During this time, it will learn from real traffic. If you set it to 1025, it will learn from 1025 requests and then it will start to actually filter the requests. The number depends by many factors: if you have a lot of page served and a lot of contents, I suggest to increase the number.
+The parameter "SENIORITY" is then the amount of requests it will set in "PASS+LEARN" before the filtering starts. During this time, it will learn from real traffic. It will block no traffic unless "seniority" is reach. If you set it to 1025, it will learn from 1025 requests and then it will start to actually filter the requests. The number depends by many factors: if you have a lot of page served and a lot of contents, I suggest to increase the number.
 
 **DUMPFILE** 
 
 This is where you want the dumpfile to be saved. Useful with Docker volumes.
 
+**REFRESHTIME**
+
+Interval to refresh the spurious records. Some string is classified both as good and bad. ('Meh'). 
+To optimize and make the daemon smaller, we clean them from time to time. REFRESHTIME is the amount of
+hours between cleaning. Duration is in Golang time.Duration syntax, like "24h" or "1d10h31m". 
+
 **TROUBLESHOOTING:**
 
 If DEBUG is set to "false" or not set, minute Zardoz will dump the sparse matrix describing to the whole bayesian learning, into a file named bayes.json. This contains the weighted matrix of calls and classes. If Zardoz is not behaving like you expected, you may give a look to this file. The format is a classic sparse matrix. WARNING: this file **may** contain cookies or other sensitive headers.

+ 2 - 0
alloc.go

@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"log"
 	"net/http"
+	"time"
 )
 
 //HTTPFlow is a type containg all the data we need.
@@ -12,6 +13,7 @@ type HTTPFlow struct {
 	response    *http.Response
 	sensitivity float64 // value who triggers decisions
 	seniority   int64
+	refreshtime time.Duration
 }
 
 //DebugLog tells if logs are in debug mode or not

+ 25 - 0
bayes.json

@@ -0,0 +1,25 @@
+{
+ "LastUpdate": "2019-12-04 14:32:16.403693322 +0100 CET m=+0.016948951",
+ "GOOD": [
+  {
+   "Token": "/Gun/good",
+   "LastSeen": "2019-12-04 14:32:16.390739816 +0100 CET",
+   "Age": "13.201146ms"
+  }
+ ],
+ "BAD": [
+  {
+   "Token": "/Penis/bad",
+   "LastSeen": "2019-12-04 14:32:16.389706996 +0100 CET",
+   "Age": "14.183289ms"
+  }
+ ],
+ "MEH": [
+  {
+   "Token": "Dildo",
+   "LastSeen": "2019-12-04 14:32:16.390823335 +0100 CET",
+   "Age": "13.128746ms"
+  }
+ ],
+ "STATS": null
+}

+ 4 - 0
blacklist.txt

@@ -0,0 +1,4 @@
+penis
+wallet
+
+

+ 17 - 5
classifier.go

@@ -11,7 +11,7 @@ import (
 	"strings"
 )
 
-func classifierDecide(resp *http.Response) error {
+func passAndLearn(resp *http.Response) error {
 
 	ProxyFlow.response = resp
 	ProxyFlow.seniority++
@@ -38,7 +38,7 @@ func classifierDecide(resp *http.Response) error {
 	return nil
 }
 
-func blockWithoutLock(resp *http.Response) error {
+func blockAndlearn(resp *http.Response) error {
 
 	ProxyFlow.response = resp
 	ProxyFlow.seniority++
@@ -69,9 +69,16 @@ func blockWithoutLock(resp *http.Response) error {
 
 func sanitizeHeaders(s string) string {
 
-	re := regexp.MustCompile(`[a-zA-Z]{3,32}|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})`)
+	re := regexp.MustCompile(`[a-zA-Z]{4,32}|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|([{][/].*[}])|([0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12})`)
 	matched := re.FindAllString(s, -1)
-	return strings.ToLower(strings.Join(matched, " "))
+
+	tmpSt := strings.ToLower(strings.Join(matched, " "))
+	tmpSt = strings.ReplaceAll(tmpSt, "{", "")
+	tmpSt = strings.ReplaceAll(tmpSt, "}", "")
+
+	log.Println("Matched: " + tmpSt)
+
+	return tmpSt
 
 }
 
@@ -86,6 +93,8 @@ func feedRequest(req *http.Request, class string) {
 	if class == "BAD" {
 		for _, token := range feedarray {
 
+			log.Println("Feeding BAD token: ", token)
+
 			ControPlane.BadTokens <- token
 
 		}
@@ -94,6 +103,8 @@ func feedRequest(req *http.Request, class string) {
 	if class == "GOOD" {
 		for _, token := range feedarray {
 
+			log.Println("Feeding GOOD Token:", token)
+
 			ControPlane.GoodTokens <- token
 
 		}
@@ -112,5 +123,6 @@ func formatRequest(req *http.Request) string {
 		fmt.Println(err)
 	}
 
-	return fmt.Sprintf("%s\n", requestDump)
+	return fmt.Sprintf("{%s} %s\n", req.URL.Path, requestDump)
+
 }

+ 70 - 15
file.go

@@ -1,14 +1,41 @@
 package main
 
 import (
+	"encoding/json"
 	"fmt"
 	"io"
-
 	"log"
 	"os"
 	"time"
 )
 
+type dumpStruct struct {
+	Update string  `json:"LastUpdate"`
+	GOOD   []GOOD  `json:"GOOD"`
+	BAD    []BAD   `json:"BAD"`
+	MEH    []MEH   `json:"MEH"`
+	STATS  []STATS `json:"STATS"`
+}
+type GOOD struct {
+	Token    string `json:"Token"`
+	LastSeen string `json:"LastSeen"`
+	Delta    string `json:"Age"`
+}
+type BAD struct {
+	Token    string `json:"Token"`
+	LastSeen string `json:"LastSeen"`
+	Delta    string `json:"Age"`
+}
+type MEH struct {
+	Token    string `json:"Token"`
+	LastSeen string `json:"LastSeen"`
+	Delta    string `json:"Age"`
+}
+type STATS struct {
+	Decision string `json:"Decision"`
+	Amount   int64  `json:"Amount"`
+}
+
 // WriteToFile will print any string of text to a file safely by
 // checking for errors and syncing at the end.
 func writeToFile(filename string, data string) error {
@@ -34,41 +61,69 @@ func handlepanic() {
 
 func saveBayesToFile() {
 
-	var tmpJSON string
+	var tmpJSON, tmpBayes string
+	var DumpJson = new(dumpStruct)
 
-	log.Println("Trying to write json file")
+	DumpJson.Update = time.Now().String()
 
-	tmpJSON = fmt.Sprintln("\nBAD: ")
+	log.Println("Trying to write json file")
 
 	Classifier.BAD.Range(func(key interface{}, value interface{}) bool {
-		tmpJSON = fmt.Sprintf("\t%s %-32s: %d\n", tmpJSON, key.(string), value.(int64))
+		var t BAD
+		v := value.(int64)
+		t.Token = key.(string)
+		t.LastSeen = time.Unix(0, v).String()
+		t.Delta = time.Since(time.Unix(0, v)).String()
+		DumpJson.BAD = append(DumpJson.BAD, t)
 		return true
 	})
 
-	tmpJSON = fmt.Sprintf("%s \rGOOD: \n", tmpJSON)
-
 	Classifier.GOOD.Range(func(key interface{}, value interface{}) bool {
-		tmpJSON = fmt.Sprintf("\t%s %-32s: %d\n", tmpJSON, key.(string), value.(int64))
+		var t GOOD
+		v := value.(int64)
+		t.Token = key.(string)
+		t.LastSeen = time.Unix(0, v).String()
+		t.Delta = time.Since(time.Unix(0, v)).String()
+		DumpJson.GOOD = append(DumpJson.GOOD, t)
 		return true
 	})
 
-	tmpJSON = fmt.Sprintf("%s \rMEH: \n", tmpJSON)
-
 	Classifier.MEH.Range(func(key interface{}, value interface{}) bool {
-		tmpJSON = fmt.Sprintf("\t%s %-32s: %d\n", tmpJSON, key.(string), value.(int64))
+		var t MEH
+		v := value.(int64)
+		t.Token = key.(string)
+		t.LastSeen = time.Unix(0, v).String()
+		t.Delta = time.Since(time.Unix(0, v)).String()
+		DumpJson.MEH = append(DumpJson.MEH, t)
 		return true
 	})
 
-	tmpJSON = fmt.Sprintf("%s \rSTATS: \n", tmpJSON)
-
 	Classifier.STATS.Range(func(key interface{}, value interface{}) bool {
-		tmpJSON = fmt.Sprintf("\t%s %-32s: %d\n", tmpJSON, key.(string), value.(int64))
+		var t STATS
+		t.Decision = key.(string)
+		t.Amount = value.(int64)
+		DumpJson.STATS = append(DumpJson.STATS, t)
+
 		return true
 	})
 
+	if tmpJ, e := json.MarshalIndent(DumpJson, "", " "); e == nil {
+		tmpJSON = fmt.Sprintf("%s", tmpJ)
+	} else {
+		tmpJSON = e.Error()
+	}
+
+	if tmpB, er := json.MarshalIndent(Classifier.bayez.Matrix, "", " "); er == nil {
+		tmpBayes = fmt.Sprintf("%s", tmpB)
+	} else {
+		tmpBayes = er.Error()
+	}
+
+	tmpJSON = fmt.Sprintf("%s\n%s", tmpJSON, tmpBayes)
+
 	dumpfile := os.Getenv("DUMPFILE")
 	if dumpfile == "" {
-		dumpfile = "bayes.txt"
+		dumpfile = "bayes.json"
 	}
 
 	if DebugLog {

+ 4 - 3
handler.go

@@ -19,19 +19,20 @@ func handler(p *httputil.ReverseProxy) func(http.ResponseWriter, *http.Request)
 
 		switch action {
 		case "BLOCK", "BLOCKLEARN":
-			p.ModifyResponse = blockWithoutLock
+			p.ModifyResponse = blockAndlearn
+			w.Header().Set("Probabilities", fmt.Sprintf("%v ", probs))
 			log.Println("Request Blocked")
 			p.ServeHTTP(w, r)
 
 		case "PASS", "PASSLEARN":
-			p.ModifyResponse = classifierDecide
+			p.ModifyResponse = passAndLearn
 			w.Header().Set("Probabilities", fmt.Sprintf("%v ", probs))
 			p.ServeHTTP(w, r)
 			log.Println("Passing Request")
 
 		default:
 			log.Println("No Decision: PASS and LEARN")
-			p.ModifyResponse = classifierDecide
+			p.ModifyResponse = passAndLearn
 			w.Header().Set("Probabilities", fmt.Sprintf("%v ", probs))
 			p.ServeHTTP(w, r)
 

+ 87 - 44
matrix.go

@@ -1,13 +1,17 @@
 package main
 
 import (
+	"bufio"
 	"fmt"
 	"log"
+	"os"
 	"sort"
 	"strconv"
-	"strings"
+
 	"sync"
 	"time"
+
+	"github.com/lytics/multibayes"
 )
 
 //ByControlPlane contains all the channels we need.
@@ -26,6 +30,7 @@ type ByClassifier struct {
 	BAD   sync.Map
 	MEH   sync.Map
 	STATS sync.Map
+	bayez *multibayes.Classifier
 }
 
 //AddStats adds the statistics after proper blocking.
@@ -46,21 +51,19 @@ func (c *ByClassifier) IsBAD(key string) {
 
 	if _, ok := c.MEH.Load(key); ok {
 		c.MEH.Store(key, time.Now().UnixNano())
+		log.Println("Updated BAD into MEH: ", key)
 		return
 	}
 
 	if _, ok := c.GOOD.Load(key); ok {
 		c.MEH.Store(key, time.Now().UnixNano())
 		c.GOOD.Delete(key)
-		return
-	}
-
-	if _, ok := c.BAD.Load(key); ok {
-		c.BAD.Store(key, time.Now().UnixNano())
+		log.Println("Moved to MEH from GOOD: ", key)
 		return
 	}
 
 	c.BAD.Store(key, time.Now().UnixNano())
+	log.Println("Stored into BAD: ", key)
 
 }
 
@@ -69,59 +72,36 @@ func (c *ByClassifier) IsGOOD(key string) {
 
 	if _, ok := c.MEH.Load(key); ok {
 		c.MEH.Store(key, time.Now().UnixNano())
+		log.Println("Updated GOOD into MEH: ", key)
 		return
 	}
 
 	if _, ok := c.BAD.Load(key); ok {
 		c.MEH.Store(key, time.Now().UnixNano())
 		c.BAD.Delete(key)
-		return
-	}
-
-	if _, ok := c.GOOD.Load(key); ok {
-		c.GOOD.Store(key, time.Now().UnixNano())
+		log.Println("Moved to MEH from BAD: ", key)
 		return
 	}
 
 	c.GOOD.Store(key, time.Now().UnixNano())
+	log.Println("Stored into GOOD: ", key)
 
 }
 
 //Posterior calculates the posterior probabilities in pseudo-bayes.
-func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
-
-	headers := strings.Fields(sanitizeHeaders(hdr))
+func (c *ByClassifier) Posterior(hdr string) (ff map[string]float64) {
 
-	var result = make(map[string]float64)
-	result["BAD"] = 0
-	result["GOOD"] = 0
+	defer func() {
 
-	var tmpResGood, tmpResBad, tmpTotal float64
-
-	for _, token := range headers {
-
-		if _, ok := c.BAD.Load(token); ok {
-			tmpResBad++
-			tmpTotal++
+		if a := recover(); a != nil {
+			fmt.Println("OPS!: Recovering from:", a)
+			ff = make(map[string]float64)
+			ff["BAD"] = 0.5
+			ff["GOOD"] = 0.5
 		}
+	}()
 
-		if _, ok := c.GOOD.Load(token); ok {
-			tmpResGood++
-			tmpTotal++
-		}
-
-	}
-
-	if tmpTotal == 0 {
-		tmpTotal = 1
-	}
-
-	log.Printf("Bad Tokens: %f, Good Tokens %f , Total %f\n", tmpResBad, tmpResGood, tmpTotal)
-
-	result["BAD"] = tmpResBad / tmpTotal
-	result["GOOD"] = tmpResGood / tmpTotal
-
-	return result
+	return c.bayez.Posterior(hdr)
 
 }
 
@@ -140,6 +120,37 @@ func (c *ByClassifier) Janitor(size int) {
 
 }
 
+//RefreshBayes refresh the bayesian using values we stored
+func (c *ByClassifier) RefreshBayes() {
+
+	log.Println("RefreshBayes Thread started")
+
+	ticker := time.NewTicker(5 * time.Minute)
+
+	for ; true; <-ticker.C {
+
+		c.bayez = multibayes.NewClassifier()
+		c.bayez.MinClassSize = 0
+
+		c.BAD.Range(func(key interface{}, value interface{}) bool {
+			c.bayez.Add(key.(string), []string{"BAD"})
+			return true
+		})
+
+		c.GOOD.Range(func(key interface{}, value interface{}) bool {
+			c.bayez.Add(key.(string), []string{"GOOD"})
+			return true
+		})
+
+		c.MEH.Range(func(key interface{}, value interface{}) bool {
+			c.bayez.Add(key.(string), []string{"GOOD", "BAD"})
+			return true
+		})
+
+	}
+
+}
+
 //CleanThread is the Janitor thread
 func (c *ByClassifier) CleanThread() {
 
@@ -164,8 +175,11 @@ func (c *ByClassifier) enroll() {
 	ControPlane.GoodTokens = make(chan string, 2048)
 	ControPlane.StatsTokens = make(chan string, 2048)
 
-	c.IsBAD("Penis")
-	c.IsGOOD("Gun")
+	c.bayez = multibayes.NewClassifier()
+	c.bayez.MinClassSize = 0
+
+	c.readInitList("blacklist.txt", "BAD")
+	c.readInitList("whitelist.txt", "GOOD")
 	c.MEH.Store("Dildo", time.Now().UnixNano())
 
 	go c.readBadTokens()
@@ -174,6 +188,7 @@ func (c *ByClassifier) enroll() {
 
 	log.Println("Classifier populated...")
 	go c.CleanThread()
+	go c.RefreshBayes()
 	log.Println("Janitor Started")
 
 }
@@ -197,7 +212,7 @@ func sortMap(unsorted *sync.Map, size int) {
 
 	sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
 
-	if len(tempCont) > size {
+	if size > 0 && len(tempCont) > size {
 		tempCont = tempCont[:size]
 	}
 
@@ -217,6 +232,7 @@ func (c *ByClassifier) readBadTokens() {
 	log.Println("Start reading BAD tokens")
 
 	for token := range ControPlane.BadTokens {
+		log.Println("Received BAD Token: ", token)
 		c.IsBAD(token)
 	}
 
@@ -227,6 +243,7 @@ func (c *ByClassifier) readGoodTokens() {
 	log.Println("Start reading GOOD tokens")
 
 	for token := range ControPlane.GoodTokens {
+		log.Println("Received GOOD Token: ", token)
 		c.IsGOOD(token)
 	}
 
@@ -241,3 +258,29 @@ func (c *ByClassifier) readStatsTokens() {
 	}
 
 }
+
+func (c *ByClassifier) readInitList(filePath, class string) {
+
+	inFile, err := os.Open(filePath)
+	if err != nil {
+		log.Println(err.Error() + `: ` + filePath)
+		return
+	}
+	defer inFile.Close()
+
+	scanner := bufio.NewScanner(inFile)
+	for scanner.Scan() {
+
+		if len(scanner.Text()) > 3 {
+			switch class {
+			case "BAD":
+				log.Println("Loading into Blacklist: ", scanner.Text()) // the line
+				c.IsBAD(scanner.Text())
+			case "GOOD":
+				log.Println("Loading into Whitelist: ", scanner.Text()) // the line
+				c.IsGOOD(scanner.Text())
+			}
+		}
+	}
+
+}

+ 0 - 254
matrix.old

@@ -1,254 +0,0 @@
-package main
-
-import (
-	"log"
-	"sort"
-	"strings"
-	"time"
-
-	"github.com/n3integration/classifier/naive"
-)
-
-//ByControlplane contains all the channels we need.
-type ByControlPlane struct {
-	BadTokens   chan string
-	GoodTokens  chan string
-	StatsTokens chan string
-}
-
-//Controlplane is the variabile
-var ControPlane ByControlPlane
-
-//ByClassifier is the structure containing our Pseudo-Bayes classifier.
-type ByClassifier struct {
-	GOOD  map[string]float64
-	BAD   map[string]float64
-	MEH   map[string]float64
-	STATS map[string]int64
-}
-
-//AddStats adds the statistics after proper blocking.
-func (c *ByClassifier) AddStats(action string) {
-
-	if _, ok := c.STATS[action]; ok {
-		c.STATS[action]++
-	} else {
-		c.STATS[action] = 1
-	}
-
-}
-
-//IsBAD inserts a bad key in the right place.
-func (c *ByClassifier) IsBAD(key string) {
-
-	if _, ok := c.MEH[key]; ok {
-		c.MEH[key]++
-		return
-	}
-
-	if score, ok := c.GOOD[key]; ok {
-		c.MEH[key] = score + 1
-		delete(c.GOOD, key)
-		return
-	}
-
-	if _, ok := c.BAD[key]; ok {
-		c.BAD[key]++
-		return
-	}
-
-	c.BAD[key] = 1
-
-}
-
-//IsGOOD inserts the key in the right place.
-func (c *ByClassifier) IsGOOD(key string) {
-
-	if _, ok := c.MEH[key]; ok {
-		c.MEH[key]++
-		return
-	}
-
-	if score, ok := c.BAD[key]; ok {
-		c.MEH[key] = score + 1
-		delete(c.BAD, key)
-		return
-	}
-
-	if _, ok := c.GOOD[key]; ok {
-		c.GOOD[key]++
-		return
-	}
-
-	c.GOOD[key] = 1
-
-}
-
-//Posterior calculates the posterior probabilities in pseudo-bayes.
-func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
-
-	headers := strings.Fields(sanitizeHeaders(hdr))
-
-	var result = make(map[string]float64)
-	result["BAD"] = 0
-	result["GOOD"] = 0
-
-	var tmpResGood, tmpResBad float64
-
-	for _, token := range headers {
-
-		if _, ok := c.BAD[token]; ok {
-			tmpResBad += c.BAD[token]
-		}
-
-		if _, ok := c.GOOD[token]; ok {
-			tmpResGood += c.GOOD[token]
-		}
-
-	}
-
-	tmpTotal := tmpResBad + tmpResGood
-
-	if tmpTotal == 0 {
-		tmpTotal = 1
-	}
-
-	result["BAD"] = tmpResBad / tmpTotal
-	result["GOOD"] = tmpResGood / tmpTotal
-
-	log.Println(c.Bayes(hdr))
-
-	return result
-
-}
-
-//Janitor keeps the maps under a certain size, keeping the biggest values.
-func (c *ByClassifier) Janitor(size int) {
-
-	log.Println("Janitor Running")
-
-	c.BAD = sortMap(c.BAD, size)
-	c.GOOD = sortMap(c.GOOD, size)
-	c.MEH = sortMap(c.MEH, size)
-
-	log.Println("Janitor Finished.")
-
-}
-
-//CleanThread is the Janitor thread
-func (c *ByClassifier) CleanThread() {
-
-	for {
-		time.Sleep(10 * time.Minute)
-		c.Janitor(1024)
-	}
-}
-
-func (c *ByClassifier) enroll() {
-
-	c.BAD = make(map[string]float64)
-	c.GOOD = make(map[string]float64)
-	c.MEH = make(map[string]float64)
-	c.STATS = make(map[string]int64)
-	ControPlane.BadTokens = make(chan string, 2048)
-	ControPlane.GoodTokens = make(chan string, 2048)
-	ControPlane.StatsTokens = make(chan string, 2048)
-
-	c.IsBAD("Penis")
-	c.IsGOOD("Gun")
-	c.MEH["meh"] = 0
-
-	go c.readBadTokens()
-	go c.readGoodTokens()
-	go c.readStatsTokens()
-
-	log.Println("Classifier populated...")
-	go c.CleanThread()
-	log.Println("Janitor Started")
-
-}
-
-func sortMap(unsorted map[string]float64, size int) map[string]float64 {
-
-	retMap := make(map[string]float64)
-
-	type Myt struct {
-		Name string
-		Num  float64
-	}
-
-	var tempCont []Myt
-	var tc Myt
-
-	for k, v := range unsorted {
-		tc.Name = k
-		tc.Num = v
-		tempCont = append(tempCont, tc)
-	}
-
-	sort.Slice(tempCont, func(i, j int) bool { return tempCont[i].Num > tempCont[j].Num })
-
-	if len(tempCont) > size {
-		tempCont = tempCont[:size]
-	}
-
-	for _, a := range tempCont {
-		retMap[a.Name] = a.Num
-	}
-
-	return retMap
-
-}
-
-func (c *ByClassifier) readBadTokens() {
-
-	log.Println("Start reading BAD tokens")
-
-	for token := range ControPlane.BadTokens {
-		c.IsBAD(token)
-	}
-
-}
-
-func (c *ByClassifier) readGoodTokens() {
-
-	log.Println("Start reading GOOD tokens")
-
-	for token := range ControPlane.GoodTokens {
-		c.IsGOOD(token)
-	}
-
-}
-
-func (c *ByClassifier) readStatsTokens() {
-
-	log.Println("Start reading STATS tokens")
-
-	for token := range ControPlane.StatsTokens {
-		c.AddStats(token)
-	}
-
-}
-
-func (c *ByClassifier) Bayes(hdr string) string {
-
-	classifier := naive.New()
-	headers := sanitizeHeaders(hdr)
-
-	for k, _ := range c.BAD {
-		classifier.TrainString(k, "BAD")
-	}
-
-	for k, _ := range c.GOOD {
-		classifier.TrainString(k, "GOOD")
-	}
-
-	if classification, err := classifier.ClassifyString(headers); err == nil {
-		return classification // ham
-	} else {
-		log.Println("error: ", err)
-	}
-
-	return ""
-
-}

+ 3 - 3
run.sh

@@ -1,8 +1,8 @@
 export REVERSEURL=https://google.com 
-export PROXYPORT=":8080" 
+export PROXYPORT=":8089" 
 export TRIGGER="0.6"
 #export SENIORITY="1025"
-export SENIORITY="5"
+export SENIORITY="15"
 export DEBUG="true"
-export DUMPFILE="bayes.txt"
+export DUMPFILE="bayes.json"
 ./zardoz 

+ 4 - 0
whitelist.txt

@@ -0,0 +1,4 @@
+guns
+cors
+/favicon.ico
+favicon