boyska преди 10 години
родител
ревизия
21a1bdcac8
променени са 5 файла, в които са добавени 100 реда и са изтрити 11 реда
  1. 1 0
      .gitignore
  2. 20 0
      README.txt
  3. 4 0
      defaultconf.json
  4. 43 0
      dirset.py
  5. 32 11
      leggi.py

+ 1 - 0
.gitignore

@@ -2,3 +2,4 @@ cred.txt
 ve
 *.log
 .*.swp
+*.pyc

+ 20 - 0
README.txt

@@ -0,0 +1,20 @@
+Just a scraper that stores in a DirSet.
+
+To receive notifications, watch that directory some way.
+
+Configuration
+=============
+
+Default conf is read from `defaultconf.json`. If you want to tweak, just make
+another json file that contains a dictionary. The two files will be merged, so
+the custom file can be a subset of the default one.
+
+Credential file is very simple: first line contains the username, second line
+contains the password. Subsequent lines are ignored.
+
+Running
+=======
+
+`python leggi.py`
+or
+`python leggi.py /path/to/custom.json`

+ 4 - 0
defaultconf.json

@@ -0,0 +1,4 @@
+{
+	"datadir": "data/",
+	"credfile": "cred.txt"
+}

+ 43 - 0
dirset.py

@@ -0,0 +1,43 @@
+import hashlib
+import os.path
+
+
+class DirSet(object):
+    '''
+    Let you use a directory as a set of strings
+
+    It is not exactly a set: you can't iter it, only add, check for existence,
+    remove
+    '''
+    def __init__(self, dirpath):
+        self.path = dirpath
+        if not os.path.exists(self.path):
+            raise ValueError('Path "%s" does not exist' % dirpath)
+        if not os.path.isdir(self.path):
+            raise ValueError('Path "%s" is not a directory' % dirpath)
+
+    def get_hash(self, obj):
+        if isinstance(obj, unicode):
+            obj = obj.encode('utf-8')
+        m = hashlib.sha256()
+        m.update(obj)
+        return m.hexdigest()
+
+    def add(self, obj):
+        fpath = os.path.join(self.path, self.get_hash(obj))
+        if os.path.exists(fpath):
+            return False
+        else:
+            with open(fpath, 'w') as buf:
+                buf.write(obj)
+                return True
+
+    def __contains__(self, obj):
+        fpath = os.path.join(self.path, self.get_hash(obj))
+        return not os.path.exists(fpath)
+
+    def __delitem__(self, obj):
+        fpath = os.path.join(self.path, self.get_hash(obj))
+        if not os.path.exists(fpath):
+            raise Exception('object not found in DirSet')
+        os.remove(fpath)

+ 32 - 11
leggi.py

@@ -1,5 +1,11 @@
+import json
+import sys
+from itertools import imap
+
 from splinter import Browser
 
+from dirset import DirSet
+
 
 def read_pass_file(fname):
     with open(fname) as buf:
@@ -8,16 +14,31 @@ def read_pass_file(fname):
     return user, pwd
 
 
-user, password = read_pass_file('cred.txt')
+def get_calls(user, password):
+    with Browser('phantomjs') as b:
+        b.visit('https://www.messagenet.com/')
+        b.fill('userid', user)
+        b.fill('password', password)
+        b.find_by_css('#login button').click()
+
+        b.visit('https://www.messagenet.com/voip/log/?chiamate=ricevute')
+        rows = b.find_by_css('.log .statusKO')
+        for r in rows:
+            cells = r.find_by_tag('td')[1:3]
+            yield tuple(imap(lambda c: c.value, cells))
+
+
+def save_calls(calls, datadir):
+    s = DirSet(datadir)
+    for call in imap(lambda t: '\t'.join(t), calls):
+        if s.add(call):  # wasn't existing before
+            print 'NEW: %s' % call
 
-with Browser('phantomjs') as b:
-    b.visit('https://www.messagenet.com/')
-    b.fill('userid', user)
-    b.fill('password', password)
-    b.find_by_css('#login button').click()
 
-    b.visit('https://www.messagenet.com/voip/log/?chiamate=ricevute')
-    rows = b.find_by_css('.log .statusKO')
-    for r in rows:
-        cells = r.find_by_tag('td')[1:3]
-        print '\t'.join(map(lambda c: c.value, cells))
+if __name__ == '__main__':
+    conf = json.load(open('defaultconf.json'))
+    if len(sys.argv) == 2:
+        conf.update(json.load(open(sys.argv[1])))
+    user, password = read_pass_file(conf['credfile'])
+    calls = tuple(get_calls(user, password))
+    save_calls(calls, conf['datadir'])