Browse Source

deduplicate Dictionary

boyska 4 years ago
parent
commit
241114c895
1 changed files with 10 additions and 2 deletions
  1. 10 2
      src/main.rs

+ 10 - 2
src/main.rs

@@ -8,6 +8,8 @@ use std::fs::File;
 use unidecode::unidecode;
 use regex::Regex;
 
+use std::collections::HashSet;
+
 #[derive(Default)]
 struct  Dictionary{
     words: Vec<String>,
@@ -17,13 +19,19 @@ struct  Dictionary{
 impl Dictionary {
     fn load_from_iterable(&mut self, lines: impl Iterator<Item = Result<String, Error>>) {
         let mut it = lines;
+        // aux_set  is used only to deduplicate
+        // so we're still using a normal Vec, and perform deduplication at load time
+        let mut aux_set = HashSet::new();
         for line in it.by_ref() {
             let line = line.unwrap();
             let word = line_to_word(line);
-            self.words.push(word);
+            if !aux_set.contains(&word) {
+                //println!("Inserisco: {}", word);
+                aux_set.insert(word.clone());
+                self.words.push(word);
+            }
         }
     }
-
 }
 
 impl Iterator for Dictionary{