deduplicate Dictionary

This commit is contained in:
boyska 2020-05-18 23:56:44 +02:00
parent 244406a71f
commit 241114c895

View file

@ -8,6 +8,8 @@ use std::fs::File;
use unidecode::unidecode; use unidecode::unidecode;
use regex::Regex; use regex::Regex;
use std::collections::HashSet;
#[derive(Default)] #[derive(Default)]
struct Dictionary{ struct Dictionary{
words: Vec<String>, words: Vec<String>,
@ -17,13 +19,19 @@ struct Dictionary{
impl Dictionary { impl Dictionary {
fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) { fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) {
let mut it = lines; let mut it = lines;
// aux_set is used only to deduplicate
// so we're still using a normal Vec, and perform deduplication at load time
let mut aux_set = HashSet::new();
for line in it.by_ref() { for line in it.by_ref() {
let line = line.unwrap(); let line = line.unwrap();
let word = line_to_word(line); let word = line_to_word(line);
self.words.push(word); if !aux_set.contains(&word) {
//println!("Inserisco: {}", word);
aux_set.insert(word.clone());
self.words.push(word);
}
} }
} }
} }
impl Iterator for Dictionary{ impl Iterator for Dictionary{