parser has its own function
This commit is contained in:
parent
67c6bf0a0f
commit
15ff07c4b3
1 changed files with 16 additions and 10 deletions
26
src/main.rs
26
src/main.rs
|
@ -1,14 +1,26 @@
|
||||||
use std::env;
|
use std::env;
|
||||||
use unidecode::unidecode;
|
use std::io::Error;
|
||||||
use regex::Regex;
|
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
use std::io::BufRead;
|
use std::io::BufRead;
|
||||||
//use std::io;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
|
use unidecode::unidecode;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
struct Dictionary {
|
struct Dictionary {
|
||||||
words: Vec<String>,
|
words: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn dictionary_from_iterable(lines: impl Iterator<Item = Result<String, Error>>) -> Dictionary {
|
||||||
|
let mut w = vec![];
|
||||||
|
for line in lines {
|
||||||
|
let line = line.unwrap();
|
||||||
|
// TODO: normalizza: lascia solo a-z, converti gli accenti, ecc.
|
||||||
|
w.push(line_to_word(line))
|
||||||
|
}
|
||||||
|
return Dictionary{words: w};
|
||||||
|
}
|
||||||
|
|
||||||
fn line_to_word(l: String) -> String {
|
fn line_to_word(l: String) -> String {
|
||||||
let l = unidecode(&l);
|
let l = unidecode(&l);
|
||||||
let l = l.to_lowercase();
|
let l = l.to_lowercase();
|
||||||
|
@ -20,16 +32,10 @@ fn main() {
|
||||||
let fname = &args[1];
|
let fname = &args[1];
|
||||||
let regexp = &args[2];
|
let regexp = &args[2];
|
||||||
|
|
||||||
let mut w = vec![];
|
|
||||||
let f = File::open(fname).unwrap();
|
let f = File::open(fname).unwrap();
|
||||||
let buf = BufReader::new(&f);
|
let buf = BufReader::new(&f);
|
||||||
for line in buf.lines() {
|
let d = dictionary_from_iterable(buf.lines());
|
||||||
let line = line.unwrap();
|
|
||||||
// TODO: normalizza: lascia solo a-z, converti gli accenti, ecc.
|
|
||||||
w.push(line_to_word(line))
|
|
||||||
}
|
|
||||||
|
|
||||||
let d = Dictionary{words: w};
|
|
||||||
let re = Regex::new(regexp).unwrap();
|
let re = Regex::new(regexp).unwrap();
|
||||||
for w in &d.words {
|
for w in &d.words {
|
||||||
if re.is_match(w) {
|
if re.is_match(w) {
|
||||||
|
|
Reference in a new issue