deduplicate Dictionary

This commit is contained in:
boyska 2020-05-18 23:56:44 +02:00
parent 244406a71f
commit 241114c895

View file

@ -8,6 +8,8 @@ use std::fs::File;
use unidecode::unidecode;
use regex::Regex;
use std::collections::HashSet;
#[derive(Default)]
struct Dictionary{
words: Vec<String>,
@ -17,13 +19,19 @@ struct Dictionary{
impl Dictionary {
fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) {
let mut it = lines;
// aux_set is used only to deduplicate
// so we're still using a normal Vec, and perform deduplication at load time
let mut aux_set = HashSet::new();
for line in it.by_ref() {
let line = line.unwrap();
let word = line_to_word(line);
self.words.push(word);
if !aux_set.contains(&word) {
//println!("Inserisco: {}", word);
aux_set.insert(word.clone());
self.words.push(word);
}
}
}
}
impl Iterator for Dictionary{