store.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import hashlib
  2. import os
  3. import re
  4. from pathlib import Path
  5. import logging
  6. logger = logging.getLogger()
  7. def get_fname(url):
  8. m = hashlib.md5()
  9. m.update(url.encode("utf8"))
  10. return m.hexdigest()
  11. class Store:
  12. def __init__(self, basedir: Path = None):
  13. if basedir is None:
  14. basedir = Path("~/.local/share/marxbook/bookmarks/").expanduser()
  15. self.basedir = basedir
  16. self.serializer = Serializer()
  17. def add(self, url: str, title=None, tag=[], description=""):
  18. dest = self.basedir
  19. dest.mkdir(parents=True, exist_ok=True)
  20. fname = get_fname(url)
  21. fpath = dest / fname
  22. content = self.serializer.encode(
  23. dict(url=url, title=title, tags=tag, description=description)
  24. )
  25. with fpath.open("w") as buf:
  26. buf.write(content)
  27. def get(self, path: str):
  28. fpath = self.basedir / path
  29. with fpath.open() as buf:
  30. return self.serializer.decode(buf.read())
  31. def find(self, prefix_path: str) -> Path:
  32. """
  33. prefix_path is a special form of contraction. Let's say prefix_path=utils/time/5e
  34. If there is a single file starting with 5e inside utils/time, then that's found!
  35. """
  36. if os.path.exists(prefix_path):
  37. return prefix_path
  38. candidates = list(self.basedir.glob(prefix_path + "*"))
  39. if not candidates:
  40. raise FileNotFoundError("%s not found" % prefix_path)
  41. if len(candidates) > 1:
  42. raise ValueError("Ambiguous prefix %s" % prefix_path)
  43. return candidates[0]
  44. def __iter__(self):
  45. for urlfile in self.basedir.glob("**/*"):
  46. if not urlfile.is_file():
  47. continue
  48. data = self.get(urlfile)
  49. ret = {"Path": str(urlfile.relative_to(self.basedir))}
  50. ret.update(data)
  51. yield ret
  52. def folder(self, folder: str):
  53. return Store(self.basedir / folder)
  54. def move(self, path: str, dest_dir: str):
  55. dest = self.basedir / dest_dir
  56. if dest.exists() and not dest.is_dir():
  57. raise ValueError(
  58. "destination '%s' already exists and is not a directory" % dest_dir
  59. )
  60. if not dest.exists():
  61. dest.mkdir(parents=True, exist_ok=True)
  62. fpath = self.basedir / path
  63. fpath.rename(dest / fpath.name)
  64. HEADER_LINE = re.compile(r"^([^:]+): (.*)$")
  65. class Serializer:
  66. def __init__(self):
  67. pass
  68. def encode(self, data: dict) -> str:
  69. m = ""
  70. tags = data.pop("tag", []) # those are special!
  71. for key in data:
  72. m += "%s: %s\n" % (key.title(), str(data[key]).replace("\n", " "))
  73. for tag in tags:
  74. m += "%s: %s\n" % ("Tag", tag)
  75. return m
  76. def decode(self, content: str) -> dict:
  77. d: dict = {"Tag": []}
  78. for num, line in enumerate(content.split("\n"), 1):
  79. if not line.strip():
  80. continue
  81. m = HEADER_LINE.match(line)
  82. if m is None:
  83. logger.error("Invalid line %d" % num)
  84. continue
  85. key, value = m.groups()
  86. key = key.title()
  87. if key == "Tag":
  88. d[key].append(value)
  89. else:
  90. d[key] = value
  91. return d
  92. if __name__ == "__main__":
  93. import sys
  94. s = Store()
  95. # print(s.get(sys.argv[1]))
  96. for line in s.list(sys.argv[1]):
  97. print(line)