dirset.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import hashlib
  2. import os.path
  3. class DirSet(object):
  4. '''
  5. Let you use a directory as a set of strings
  6. It is not exactly a set: you can't iter it, only add, check for existence,
  7. remove
  8. '''
  9. def __init__(self, dirpath):
  10. self.path = dirpath
  11. if not os.path.exists(self.path):
  12. raise ValueError('Path "%s" does not exist' % dirpath)
  13. if not os.path.isdir(self.path):
  14. raise ValueError('Path "%s" is not a directory' % dirpath)
  15. def get_hash(self, obj):
  16. if isinstance(obj, unicode):
  17. obj = obj.encode('utf-8')
  18. m = hashlib.sha256()
  19. m.update(obj)
  20. return m.hexdigest()
  21. def add(self, obj):
  22. fpath = os.path.join(self.path, self.get_hash(obj))
  23. if os.path.exists(fpath):
  24. return False
  25. else:
  26. with open(fpath, 'w') as buf:
  27. buf.write(obj)
  28. return True
  29. def __contains__(self, obj):
  30. fpath = os.path.join(self.path, self.get_hash(obj))
  31. return not os.path.exists(fpath)
  32. def __delitem__(self, obj):
  33. fpath = os.path.join(self.path, self.get_hash(obj))
  34. if not os.path.exists(fpath):
  35. raise Exception('object not found in DirSet')
  36. os.remove(fpath)