Browse Source

Better dissection of strings

Loweel 4 years ago
parent
commit
cbb8ccbbdf
2 changed files with 52 additions and 41 deletions
  1. 25 16
      bayes.json
  2. 27 25
      classifier.go

+ 25 - 16
bayes.json

@@ -1,19 +1,25 @@
 STATS: {
- "BLOCKLEARN": 3,
- "DOWNGRADE": 9,
+ "BLOCKLEARN": 2,
+ "DOWNGRADE": 12,
  "GENERATION": 1,
- "PASSLEARN": 20
+ "PASSLEARN": 26
 }
- WORKING: {q
- "/bau/11006/qualcosa": "BAD",
- "/bau/14612/qualcosa": "BAD",
- "/bau/21379/qualcosa": "BAD",
- "/bau/25473/qualcosa": "BAD",
- "/bau/26686/qualcosa": "BAD",
- "/bau/29470/qualcosa": "BAD",
- "/bau/29497/qualcosa": "BAD",
- "/bau/30467/qualcosa": "BAD",
- "/bau/9482/qualcosa": "BAD",
+ WORKING: {
+ "/.well-known/host-meta": "BAD",
+ "/.well-known/host-meta/": "BAD",
+ "/.well-known/nodeinfo": "BAD",
+ "/bau/11113/qualcosa": "BAD",
+ "/bau/11374/qualcosa": "BAD",
+ "/bau/16737/qualcosa": "BAD",
+ "/bau/19339/qualcosa": "BAD",
+ "/bau/20319/qualcosa": "BAD",
+ "/bau/21308/qualcosa": "BAD",
+ "/bau/23512/qualcosa": "BAD",
+ "/bau/23735/qualcosa": "BAD",
+ "/bau/24067/qualcosa": "BAD",
+ "/bau/28684/qualcosa": "BAD",
+ "/bau/3046/qualcosa": "BAD",
+ "/bau/3822/qualcosa": "BAD",
  "/favicon.ico": "GOOD",
  "accept": "MEH",
  "agent": "MEH",
@@ -25,14 +31,15 @@ STATS: {
  "host": "MEH",
  "http": "MEH",
  "localhost": "BAD",
+ "null": "MEH",
  "penis": "BAD",
+ "qualcosa": "BAD",
  "user": "MEH",
  "wallet": "BAD"
 }
  LEARNING: {
- "/bau/10651/qualcosa": "BAD",
- "/bau/10676/qualcosa": "BAD",
- "/bau/17183/qualcosa": "BAD",
+ "/bau/14899/qualcosa": "BAD",
+ "/bau/26490/qualcosa": "BAD",
  "accept": "MEH",
  "agent": "MEH",
  "curl": "MEH",
@@ -40,5 +47,7 @@ STATS: {
  "host": "MEH",
  "http": "MEH",
  "localhost": "BAD",
+ "null": "MEH",
+ "qualcosa": "BAD",
  "user": "MEH"
 }

+ 27 - 25
classifier.go

@@ -11,21 +11,13 @@ import (
 	"strings"
 )
 
-//Zregexp is the regular expression used by ZARDOZ to process headers
-var Zregexp string
-
-func init() {
-
-	var expressions = []string{
-		`([ ]([A-Za-z0-9-_]{4,}\.)+\w+)`,                   // domain name
-		`([ ]/[A-Za-z0-9-_/.]{4,}[ ])`,                     // URI path (also partial)
-		`[[:alpha:]]{4,32}`,                                // alpha digit token
-		`([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})`, // IP address
-		`([0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12})`, // UUID
-	}
-
-	Zregexp = strings.Join(expressions, "|")
-
+//Zexpression is the set of regexp being used by zardoz
+var Zexpressions = []string{
+	`[[:alpha:]]{4,32}`,                              // alpha digit token
+	`[ ]([A-Za-z0-9-_]{4,}\.)+\w+`,                   // domain name
+	`[ ]/[A-Za-z0-9-_/.]{4,}[ ]`,                     // URI path (also partial)
+	`[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}`, // IP address
+	`[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}`, // UUID
 }
 
 func passAndLearn(resp *http.Response) error {
@@ -86,20 +78,30 @@ func blockAndlearn(resp *http.Response) error {
 
 func sanitizeHeaders(s string) string {
 
-	re, rerr := regexp.Compile(Zregexp)
-	if rerr != nil {
-		log.Println("Error Compiling regular expression: ", Zregexp)
-	}
+	var collect []string
+	ss := strings.ToLower(s)
+
+	for _, zregexp := range Zexpressions {
+
+		re, rerr := regexp.Compile(zregexp)
+		if rerr != nil {
+			log.Println("Error Compiling regular expression: ", zregexp)
+		}
 
-	matched := re.FindAllString(s, -1)
+		matched := re.FindAllString(ss, -1)
+		if matched == nil {
+			matched = []string{"null"}
+		}
 
-	uMatched := Unique(matched)
+		collect = append(collect, matched...)
+
+	}
 
-	tmpSt := strings.ToLower(strings.Join(uMatched, " "))
+	uMatched := Unique(collect)
 
-	log.Println("Matched: " + tmpSt)
+	log.Println("Matched: ", uMatched)
 
-	return tmpSt
+	return strings.Join(uMatched, " ")
 
 }
 
@@ -165,5 +167,5 @@ func isAuth(resp *http.Response) bool {
 }
 
 func IsError(resp *http.Response) bool {
-	return resp.StatusCode >= 400
+	return resp.StatusCode >= 400 && resp.StatusCode != 401
 }