|
@@ -8,30 +8,33 @@ use std::iter::Iterator;
|
|
|
use regex::Regex;
|
|
|
use unidecode::unidecode;
|
|
|
|
|
|
+use std::collections::HashSet;
|
|
|
+
|
|
|
#[derive(Default)]
|
|
|
-struct Dictionary {
|
|
|
+struct Dictionary{
|
|
|
words: Vec<String>,
|
|
|
iter_position: usize,
|
|
|
}
|
|
|
|
|
|
impl Dictionary {
|
|
|
- fn load_from_iterable(
|
|
|
- &mut self,
|
|
|
- lines: impl Iterator<Item = Result<String, Error>>,
|
|
|
- ) -> Result<(), std::io::Error> {
|
|
|
- lines
|
|
|
- .map(|line| line.and_then(|line_str| Ok(self.words.push(line_str))))
|
|
|
- .collect()
|
|
|
+ fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) {
|
|
|
+ let mut it = lines;
|
|
|
+ // aux_set is used only to deduplicate
|
|
|
+ // so we're still using a normal Vec, and perform deduplication at load time
|
|
|
+ let mut aux_set = HashSet::new();
|
|
|
+ for line in it.by_ref() {
|
|
|
+ let line = line.unwrap();
|
|
|
+ let word = line_to_word(line);
|
|
|
+ if !aux_set.contains(&word) {
|
|
|
+ //println!("Inserisco: {}", word);
|
|
|
+ aux_set.insert(word.clone());
|
|
|
+ self.words.push(word);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/*impl Dictionary {
|
|
|
- fn default() -> Dictionary {
|
|
|
- Dictionary{words: vec![], iter_position: 0}
|
|
|
- }
|
|
|
-}*/
|
|
|
-
|
|
|
-impl Iterator for Dictionary {
|
|
|
+impl Iterator for Dictionary{
|
|
|
type Item = String;
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
if self.words.len() >= self.iter_position {
|
|
@@ -44,38 +47,55 @@ impl Iterator for Dictionary {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-fn dictionary_from_iterable(lines: impl Iterator<Item = Result<String, Error>>) -> Dictionary {
|
|
|
- let mut w = vec![];
|
|
|
- for line in lines {
|
|
|
- let line = line.unwrap();
|
|
|
- // TODO: normalizza: lascia solo a-z, converti gli accenti, ecc.
|
|
|
- w.push(line_to_word(line))
|
|
|
- }
|
|
|
- return Dictionary {
|
|
|
- words: w,
|
|
|
- ..Dictionary::default()
|
|
|
- };
|
|
|
-}
|
|
|
-
|
|
|
fn line_to_word(l: String) -> String {
|
|
|
let l = unidecode(&l);
|
|
|
let l = l.to_lowercase();
|
|
|
+ let l = l.replace("'", "");
|
|
|
l
|
|
|
}
|
|
|
|
|
|
+// filtri {{{
|
|
|
+
|
|
|
+// i filtri dovrebbero essere creati da una struct Config{} creata parsando gli argomenti
|
|
|
+
|
|
|
+fn matches_regexp(regexp: &str) -> (impl std::ops::FnMut(&&String) -> bool) {
|
|
|
+ // filtro
|
|
|
+ let re = Regex::new(regexp).unwrap();
|
|
|
+ move |w| re.is_match(w.as_str())
|
|
|
+}
|
|
|
+
|
|
|
+fn sort_word(word: &str) -> Result<String, impl std::error::Error> {
|
|
|
+ // funzione ausiliaria, utile per la is_anagram e cose simili
|
|
|
+ // ritorna una COPIA
|
|
|
+ // esempio: house -> ehosu
|
|
|
+ let mut w_bytes = word.to_string().clone().into_bytes();
|
|
|
+ w_bytes.sort();
|
|
|
+ String::from_utf8(w_bytes)
|
|
|
+}
|
|
|
+
|
|
|
+fn is_anagram(word: &str) -> (impl std::ops::FnMut(&&String) -> bool) {
|
|
|
+ // filtro
|
|
|
+ let sorted_word = sort_word(line_to_word(word.to_string()).as_str()).unwrap();
|
|
|
+ move |w| {
|
|
|
+ sorted_word == sort_word(w.as_str()).unwrap()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// filtri }}}
|
|
|
+
|
|
|
fn main() {
|
|
|
let args: Vec<String> = env::args().collect();
|
|
|
let fname = &args[1];
|
|
|
- let regexp = &args[2];
|
|
|
+ let pattern = &args[2];
|
|
|
|
|
|
let f = File::open(fname).unwrap();
|
|
|
let buf = BufReader::new(&f);
|
|
|
- let d = dictionary_from_iterable(buf.lines());
|
|
|
+ let mut d = Dictionary::default();
|
|
|
+ d.load_from_iterable(buf.lines());
|
|
|
|
|
|
- let re = Regex::new(regexp).unwrap();
|
|
|
- for w in d {
|
|
|
- if re.is_match(w.as_str()) {
|
|
|
- println!("{}", w)
|
|
|
- }
|
|
|
+ //let filter = matches_regexp(pattern);
|
|
|
+ let filter = is_anagram(pattern);
|
|
|
+ for w in d.words.iter().filter(filter) {
|
|
|
+ println!("{}", w)
|
|
|
}
|
|
|
}
|