Browse Source

decode accents in dict

boyska 3 years ago
parent
commit
67c6bf0a0f
3 changed files with 12 additions and 1 deletions
  1. 7 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 4 1
      src/main.rs

+ 7 - 0
Cargo.lock

@@ -39,6 +39,7 @@ name = "sciarada"
 version = "0.1.0"
 dependencies = [
  "regex 1.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -49,6 +50,11 @@ dependencies = [
  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
+[[package]]
+name = "unidecode"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [metadata]
 "checksum aho-corasick 0.7.10 (registry+https://github.com/rust-lang/crates.io-index)" = "8716408b8bc624ed7f65d223ddb9ac2d044c0547b6fa4b0d554f3a9540496ada"
 "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
@@ -56,3 +62,4 @@ dependencies = [
 "checksum regex 1.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692"
 "checksum regex-syntax 0.6.17 (registry+https://github.com/rust-lang/crates.io-index)" = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
 "checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
+"checksum unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc"

+ 1 - 0
Cargo.toml

@@ -6,3 +6,4 @@ edition = "2018"
 
 [dependencies]
 regex = "1"
+unidecode = "0.3.0"

+ 4 - 1
src/main.rs

@@ -1,4 +1,5 @@
 use std::env;
+use unidecode::unidecode;
 use regex::Regex;
 use std::io::BufReader;
 use std::io::BufRead;
@@ -9,7 +10,9 @@ struct Dictionary {
 }
 
 fn line_to_word(l: String) -> String {
-    return l.to_lowercase()
+    let l = unidecode(&l);
+    let l = l.to_lowercase();
+    l
 }
 
 fn main() {