main.rs 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. use std::env;
  2. use std::fs::File;
  3. use std::io::BufRead;
  4. use std::io::BufReader;
  5. use std::io::Error;
  6. use std::iter::Iterator;
  7. use regex::Regex;
  8. use unidecode::unidecode;
  9. use std::collections::HashSet;
  10. #[derive(Default, Debug)]
  11. struct Dictionary {
  12. words: Vec<String>,
  13. iter_position: usize,
  14. }
  15. impl Dictionary {
  16. fn words_from_iterable(
  17. lines: impl Iterator<Item = Result<String, Error>>,
  18. ) -> Result<Vec<String>, Error> {
  19. // aux_set is used only to deduplicate
  20. // so we're still using a normal Vec, and perform deduplication at load time
  21. let mut aux_set = HashSet::new();
  22. lines
  23. .map(|res| match res {
  24. Ok(word) => Ok(line_to_word(word.as_str())),
  25. _ => res,
  26. })
  27. .filter(|res| match res {
  28. Ok(word) => aux_set.insert(word.clone()),
  29. _ => false,
  30. })
  31. .collect()
  32. }
  33. fn load(fname: String) -> Result<Self, Error> {
  34. let maybe_words = File::open(fname)
  35. .and_then(|open_f| Ok(BufReader::new(open_f)))
  36. .and_then(|buf| Self::words_from_iterable(buf.lines()));
  37. match maybe_words {
  38. Ok(words) => Ok(Dictionary {
  39. words,
  40. iter_position: 0,
  41. }),
  42. Err(err) => Err(err),
  43. }
  44. }
  45. }
  46. impl Iterator for Dictionary {
  47. type Item = String;
  48. fn next(&mut self) -> Option<Self::Item> {
  49. if self.words.len() >= self.iter_position {
  50. None
  51. } else {
  52. let v = self.words[self.iter_position].clone();
  53. self.iter_position += 1;
  54. Some(v)
  55. }
  56. }
  57. }
  58. fn line_to_word(line: &str) -> String {
  59. unidecode(&line.to_string()).to_lowercase().replace("'", "")
  60. }
  61. // filtri {{{
  62. // i filtri dovrebbero essere creati da una struct Config{} creata parsando gli argomenti
  63. #[allow(dead_code)]
  64. fn matches_regexp(regexp: &str) -> impl std::ops::FnMut(&&String) -> bool {
  65. // filtro
  66. let re = Regex::new(regexp).unwrap();
  67. move |w| re.is_match(w.as_str())
  68. }
  69. fn sort_word(word: &str) -> Result<String, impl std::error::Error> {
  70. // funzione ausiliaria, utile per la is_anagram e cose simili
  71. // ritorna una COPIA
  72. // esempio: house -> ehosu
  73. let mut w_bytes = word.to_string().into_bytes();
  74. w_bytes.sort();
  75. String::from_utf8(w_bytes)
  76. }
  77. fn is_anagram(word: &str) -> impl std::ops::FnMut(&&String) -> bool {
  78. // filtro
  79. let sorted_word = sort_word(line_to_word(word).as_str()).unwrap();
  80. move |w| {
  81. if sorted_word.len() != w.len() {
  82. // this check doesn't add any correctness to the algorithm, but
  83. // is a small optimization: avoids sorting w if the length is different
  84. false
  85. } else {
  86. let sorted_other = sort_word(w.as_str()).unwrap();
  87. sorted_word == sorted_other
  88. }
  89. }
  90. }
  91. // filtri }}}
  92. fn main() {
  93. let args: Vec<String> = env::args().collect();
  94. let fname = &args[1];
  95. let pattern = &args[2];
  96. let d = Dictionary::load(fname.to_string());
  97. //let filter = matches_regexp(pattern);
  98. let filter = is_anagram(pattern);
  99. if let Ok(dict) = d {
  100. for w in dict.words.iter().filter(filter) {
  101. println!("{}", w);
  102. }
  103. };
  104. }