Browse Source

Limit number of tokens collected per generation.

Loweel 4 years ago
parent
commit
4f831cd0a9
5 changed files with 20 additions and 12 deletions
  1. 5 6
      README.md
  2. 2 1
      alloc.go
  3. 4 4
      classifier.go
  4. 8 0
      main.go
  5. 1 1
      matrix.go

+ 5 - 6
README.md

@@ -40,7 +40,7 @@ ENV TRIGGER	0.6
 ENV SENIORITY	1025
 ENV DEBUG false
 ENV DUMPFILE /somewhere/bayes.txt
-ENV REFRESHTIME 24h
+ENV COLLECTION 2048
 ```
 
 Using a bash script, this means something like:
@@ -52,7 +52,7 @@ export TRIGGER="0.6"
 export SENIORITY="1025"
 export DEBUG="true"
 export DUMPFILE="/somewhere/bayes.txt"
-export REFRESHTIME 24h
+export COLLECTION
 ./zardoz 
 ```
 
@@ -89,11 +89,10 @@ The parameter "SENIORITY" is then the amount of requests it will set in "PASS+LE
 
 This is where you want the dumpfile to be saved. Useful with Docker volumes.
 
-**REFRESHTIME**
+**COLLECTION**
+
+The amount of collected tokens which are considered enough to do a good job. This depends by your service. This is useful to limit memory usage if your server has a very complex content,  by example. 
 
-Interval to refresh the spurious records. Some string is classified both as good and bad. ('Meh'). 
-To optimize and make the daemon smaller, we clean them from time to time. REFRESHTIME is the amount of
-hours between cleaning. Duration is in Golang time.Duration syntax, like "24h" or "1d10h31m". 
 
 **TROUBLESHOOTING:**
 

+ 2 - 1
alloc.go

@@ -13,6 +13,7 @@ type HTTPFlow struct {
 	response    *http.Response
 	sensitivity float64 // value who triggers decisions
 	seniority   int64
+	collection  float64
 	refreshtime time.Duration
 }
 
@@ -22,7 +23,7 @@ var DebugLog bool
 //ProxyFlow represents our flow
 var ProxyFlow HTTPFlow
 
-//Classifier is our bayesian classifier
+//ZClassifier is our bayesian classifier
 var ZClassifier *ByClassifier
 
 //BlockMessage is the messgae we return when blocking

+ 4 - 4
classifier.go

@@ -11,7 +11,7 @@ import (
 	"strings"
 )
 
-//Zexpression is the set of regexp being used by zardoz
+//Zexpressions is the set of regexp being used by zardoz
 var Zexpressions = []string{
 	`[[:alpha:]]{4,32}`,                              // alpha digit token
 	`[ ]([A-Za-z0-9-_]{4,}\.)+\w+`,                   // domain name
@@ -29,7 +29,7 @@ func passAndLearn(resp *http.Response) error {
 	switch {
 	case isAuth(resp):
 		log.Println("401: We don't want to store credentials")
-	case IsError(resp):
+	case isError(resp):
 		buf := bytes.NewBufferString(BlockMessage)
 		resp.Body = ioutil.NopCloser(buf)
 		resp.Status = "403 Forbidden"
@@ -65,7 +65,7 @@ func blockAndlearn(resp *http.Response) error {
 	switch {
 	case isAuth(resp):
 		log.Println("401: We don't want to store credentials")
-	case IsError(resp):
+	case isError(resp):
 		log.Println("Filing inside bad class")
 		feedRequest(req, "BAD")
 	case isSuccess(resp):
@@ -168,6 +168,6 @@ func isAuth(resp *http.Response) bool {
 	return resp.StatusCode == 401
 }
 
-func IsError(resp *http.Response) bool {
+func isError(resp *http.Response) bool {
 	return resp.StatusCode >= 400 && resp.StatusCode != 401
 }

+ 8 - 0
main.go

@@ -15,6 +15,7 @@ func main() {
 	pport := os.Getenv("PROXYPORT")
 	sensitivity := os.Getenv("TRIGGER")
 	maturity := os.Getenv("SENIORITY")
+	collect := os.Getenv("COLLECTION")
 
 	log.Println("Reverse path is: ", vip)
 	log.Println("Reverse port is: ", pport)
@@ -36,6 +37,13 @@ func main() {
 	}
 	log.Println("Minimum request to learn: ", Maturity)
 
+	ProxyFlow.collection, err = strconv.ParseFloat(collect, 64)
+	if err != nil {
+		// This is because we assume every example should add at least one token
+		ProxyFlow.collection = float64(Maturity)
+	}
+	log.Println("Collection limit is: ", ProxyFlow.collection)
+
 	proxy := httputil.NewSingleHostReverseProxy(remote)
 	http.HandleFunc("/", handler(proxy))
 	err = http.ListenAndServe(pport, nil)

+ 1 - 1
matrix.go

@@ -258,7 +258,7 @@ func (c *ByClassifier) updateLearners() {
 		}
 		log.Println("Current Generation is: ", currentGen)
 		log.Println("Working Generation is: ", c.Generation)
-		if currentGen > c.Generation {
+		if currentGen > c.Generation || float64(len(c.Learning.sMap)) > ProxyFlow.collection {
 			c.Learning.busy.Lock()
 			c.Working.busy.Lock()
 			c.Working.sMap = c.Learning.sMap