Browse Source

Removed some garbage from regexp

Loweel 4 years ago
parent
commit
d1eab1511d
2 changed files with 35 additions and 16 deletions
  1. 33 14
      bayes.json
  2. 2 2
      classifier.go

+ 33 - 14
bayes.json

@@ -1,25 +1,44 @@
 STATS: {
  "BLOCKLEARN": 3,
- "DOWNGRADE": 12,
- "GENERATION": 2,
- "PASSLEARN": 28
+ "DOWNGRADE": 9,
+ "GENERATION": 1,
+ "PASSLEARN": 20
 }
- WORKING: {
- "/": "GOOD",
- "/bau/21770/qualcosa": "BAD",
- "/bau/26179/qualcosa": "BAD",
- "/bau/4415/qualcosa": "BAD",
- "1.1": "MEH",
- "7.58.0": "MEH",
- "8089": "BAD",
+ WORKING: {q
+ "/bau/11006/qualcosa": "BAD",
+ "/bau/14612/qualcosa": "BAD",
+ "/bau/21379/qualcosa": "BAD",
+ "/bau/25473/qualcosa": "BAD",
+ "/bau/26686/qualcosa": "BAD",
+ "/bau/29470/qualcosa": "BAD",
+ "/bau/29497/qualcosa": "BAD",
+ "/bau/30467/qualcosa": "BAD",
+ "/bau/9482/qualcosa": "BAD",
+ "/favicon.ico": "GOOD",
  "accept": "MEH",
  "agent": "MEH",
+ "cors": "GOOD",
  "curl": "MEH",
- "get": "MEH",
+ "favicon": "GOOD",
+ "google": "GOOD",
+ "guns": "GOOD",
  "host": "MEH",
  "http": "MEH",
  "localhost": "BAD",
+ "penis": "BAD",
  "user": "MEH",
- "www.google.com": "GOOD"
+ "wallet": "BAD"
+}
+ LEARNING: {
+ "/bau/10651/qualcosa": "BAD",
+ "/bau/10676/qualcosa": "BAD",
+ "/bau/17183/qualcosa": "BAD",
+ "accept": "MEH",
+ "agent": "MEH",
+ "curl": "MEH",
+ "google": "GOOD",
+ "host": "MEH",
+ "http": "MEH",
+ "localhost": "BAD",
+ "user": "MEH"
 }
- LEARNING: {}

+ 2 - 2
classifier.go

@@ -17,8 +17,8 @@ var Zregexp string
 func init() {
 
 	var expressions = []string{
-		`([ ]([A-Za-z0-9-_]+\.)*\w+)`,                      // domain name
-		`([ ]/[A-Za-z0-9-_/.]*[ ])`,                        // URI path (also partial)
+		`([ ]([A-Za-z0-9-_]{4,}\.)+\w+)`,                   // domain name
+		`([ ]/[A-Za-z0-9-_/.]{3,}[ ])`,                     // URI path (also partial)
 		`[[:alpha:]]{4,32}`,                                // alpha digit token
 		`([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})`, // IP address
 		`([0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12})`, // UUID