deduplicate Dictionary
This commit is contained in:
parent
244406a71f
commit
241114c895
1 changed files with 10 additions and 2 deletions
12
src/main.rs
12
src/main.rs
|
@ -8,6 +8,8 @@ use std::fs::File;
|
||||||
use unidecode::unidecode;
|
use unidecode::unidecode;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct Dictionary{
|
struct Dictionary{
|
||||||
words: Vec<String>,
|
words: Vec<String>,
|
||||||
|
@ -17,13 +19,19 @@ struct Dictionary{
|
||||||
impl Dictionary {
|
impl Dictionary {
|
||||||
fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) {
|
fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) {
|
||||||
let mut it = lines;
|
let mut it = lines;
|
||||||
|
// aux_set is used only to deduplicate
|
||||||
|
// so we're still using a normal Vec, and perform deduplication at load time
|
||||||
|
let mut aux_set = HashSet::new();
|
||||||
for line in it.by_ref() {
|
for line in it.by_ref() {
|
||||||
let line = line.unwrap();
|
let line = line.unwrap();
|
||||||
let word = line_to_word(line);
|
let word = line_to_word(line);
|
||||||
self.words.push(word);
|
if !aux_set.contains(&word) {
|
||||||
|
//println!("Inserisco: {}", word);
|
||||||
|
aux_set.insert(word.clone());
|
||||||
|
self.words.push(word);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for Dictionary{
|
impl Iterator for Dictionary{
|
||||||
|
|
Reference in a new issue