From 7e94cfb43f7879a2916fe4acd4b176f75ab2bcab Mon Sep 17 00:00:00 2001
From: Davide Alberani <da@erlug.linux.it>
Date: Thu, 25 Jan 2018 22:46:44 +0100
Subject: [PATCH] code documentation

---
 diffido.py | 252 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 209 insertions(+), 43 deletions(-)

diff --git a/diffido.py b/diffido.py
index 7f65cda..b3ed4f6 100755
--- a/diffido.py
+++ b/diffido.py
@@ -1,5 +1,20 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+"""Diffido - because the F5 key is a terrible thing to waste.
+
+Copyright 2018 Davide Alberani <da@erlug.linux.it>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
 
 import os
 import re
@@ -46,6 +61,10 @@ logger.setLevel(logging.INFO)
 
 
 def read_schedules():
+    """Return the schedules configuration.
+
+    :returns: dictionary from the JSON object in conf/schedules.json
+    :rtype: dict"""
     if not os.path.isfile(SCHEDULES_FILE):
         return {'schedules': {}}
     try:
@@ -57,11 +76,28 @@ def read_schedules():
 
 
 def write_schedules(schedules):
-    with open(SCHEDULES_FILE, 'w') as fd:
-        fd.write(json.dumps(schedules, indent=2))
+    """Write the schedules configuration.
+
+    :param schedules: the schedules to save
+    :type schedules: dict
+    :returns: True in case of success
+    :rtype: bool"""
+    try:
+        with open(SCHEDULES_FILE, 'w') as fd:
+            fd.write(json.dumps(schedules, indent=2))
+    except Exception as e:
+        logger.error('unable to write %s: %s' % (SCHEDULES_FILE, e))
+        return False
+    return True
 
 
 def next_id(schedules):
+    """Return the next available integer (as a string) in the list of schedules keys (do not fills holes)
+
+    :param schedules: the schedules
+    :type schedules: dict
+    :returns: the ID of the next schedule
+    :rtype: str"""
     ids = schedules.get('schedules', {}).keys()
     if not ids:
         return '1'
@@ -69,7 +105,18 @@ def next_id(schedules):
 
 
 def get_schedule(id_, add_id=True):
-    schedules = read_schedules()
+    """Return information about a single schedule
+
+    :param id_: ID of the schedule
+    :type id_: str
+    :param add_id: if True, add the ID in the dictionary
+    :type add_id: bool
+    :returns: the schedule
+    :rtype: dict"""
+    try:
+        schedules = read_schedules()
+    except Exception:
+        return {}
     data = schedules.get('schedules', {}).get(id_, {})
     if add_id:
         data['id'] = str(id_)
@@ -77,6 +124,14 @@ def get_schedule(id_, add_id=True):
 
 
 def select_xpath(content, xpath):
+    """Select a portion of a HTML document
+
+    :param content: the content of the document
+    :type content: str
+    :param xpath: the XPath selector
+    :type xpath: str
+    :returns: the selected document
+    :rtype: str"""
     fd = io.StringIO(content)
     tree = etree.parse(fd)
     elems = tree.xpath(xpath)
@@ -91,16 +146,29 @@ def select_xpath(content, xpath):
 
 
 def run_job(id_=None, *args, **kwargs):
+    """Run a job
+
+    :param id_: ID of the schedule to run
+    :type id_: str
+    :param args: positional arguments
+    :type args: tuple
+    :param kwargs: named arguments
+    :type kwargs: dict
+    :returns: True in case of success
+    :rtype: bool"""
     schedule = get_schedule(id_, add_id=False)
     url = schedule.get('url')
     if not url:
-        return
-    logger.debug('Running job id:%s title:%s url: %s' % (id_, schedule.get('title', ''), url))
+        return False
+    logger.debug('running job id:%s title:%s url: %s' % (id_, schedule.get('title', ''), url))
     req = requests.get(url, allow_redirects=True, timeout=(30.10, 240))
     content = req.text
     xpath = schedule.get('xpath')
     if xpath:
-        content = select_xpath(content, xpath)
+        try:
+            content = select_xpath(content, xpath)
+        except Exception as e:
+            logger.warn('unable to extract XPath %s: %s' % (xpath, e))
     req_path = urllib.parse.urlparse(req.url).path
     base_name = os.path.basename(req_path) or 'index.html'
     def _commit(id_, filename, content, queue):
@@ -137,17 +205,17 @@ def run_job(id_=None, *args, **kwargs):
     p.join()
     email = schedule.get('email')
     if not email:
-        return
+        return True
     changes = res.get('changes')
     if not changes:
-        return
+        return True
     min_change = schedule.get('minimum_change')
     previous_lines = res.get('previous_lines')
     if min_change and previous_lines:
         min_change = float(min_change)
         change_fraction = res.get('changes') / previous_lines
         if change_fraction < min_change:
-            return
+            return True
     # send notification
     diff = get_diff(id_).get('diff')
     if not diff:
@@ -157,6 +225,16 @@ def run_job(id_=None, *args, **kwargs):
 
 
 def safe_run_job(id_=None, *args, **kwargs):
+    """Safely run a job, catching all the exceptions
+
+    :param id_: ID of the schedule to run
+    :type id_: str
+    :param args: positional arguments
+    :type args: tuple
+    :param kwargs: named arguments
+    :type kwargs: dict
+    :returns: True in case of success
+    :rtype: bool"""
     try:
         run_job(id_, *args, **kwargs)
     except Exception as e:
@@ -164,16 +242,34 @@ def safe_run_job(id_=None, *args, **kwargs):
 
 
 def send_email(to, subject='diffido', body='', from_=None):
+    """Send an email
+
+    :param to: destination address
+    :type to: str
+    :param subject: email subject
+    :type subject: str
+    :param body: body of the email
+    :type body: str
+    :param from_: sender address
+    :type from_: str
+    :returns: True in case of success
+    :rtype: bool"""
     msg = MIMEText(body)
     msg['Subject'] = subject
     msg['From'] = from_ or EMAIL_FROM
     msg['To'] = to
-    s = smtplib.SMTP('localhost')
-    s.send_message(msg)
-    s.quit()
+    with smtplib.SMTP('localhost') as s:
+        s.send_message(msg)
+    return True
 
 
 def get_history(id_):
+    """Read the history of a schedule
+
+    :param id_: ID of the schedule
+    :type id_: str
+    :returns: information about the schedule and its history
+    :rtype: dict"""
     def _history(id_, queue):
         os.chdir('storage/%s' % id_)
         p = subprocess.Popen([GIT_CMD, 'log', '--pretty=oneline', '--shortstat'], stdout=subprocess.PIPE)
@@ -201,6 +297,16 @@ def get_history(id_):
 
 
 def get_diff(id_, commit_id='HEAD', old_commit_id=None):
+    """Return the diff between commits of a schedule
+
+    :param id_: ID of the schedule
+    :type id_: str
+    :param commit_id: the most recent commit ID; HEAD by default
+    :type commit_id: str
+    :param old_commit_id: the older commit ID; if None, the previous commit is used
+    :type old_commit_id: str
+    :returns: information about the schedule and the diff between commits
+    :rtype: dict"""
     def _history(id_, commit_id, old_commit_id, queue):
         os.chdir('storage/%s' % id_)
         p = subprocess.Popen([GIT_CMD, 'diff', old_commit_id or '%s~' % commit_id, commit_id],
@@ -217,50 +323,115 @@ def get_diff(id_, commit_id='HEAD', old_commit_id=None):
 
 
 def scheduler_update(scheduler, id_):
+    """Update a scheduler job, using information from the JSON object
+
+    :param scheduler: the TornadoScheduler instance to modify
+    :type scheduler: TornadoScheduler
+    :param id_: ID of the schedule that must be updated
+    :type id_: str
+    :returns: True in case of success
+    :rtype: bool"""
     schedule = get_schedule(id_, add_id=False)
     if not schedule:
-        return
+        logger.warn('unable to update empty schedule %s' % id_)
+        return False
     trigger = schedule.get('trigger')
     if trigger not in ('interval', 'cron'):
-        return
+        logger.warn('unable to update empty schedule %s: trigger not in ("cron", "interval")' % id_)
+        return False
     args = {}
     if trigger == 'interval':
         args['trigger'] = 'interval'
         for unit in 'weeks', 'days', 'hours', 'minutes', 'seconds':
             if 'interval_%s' % unit not in schedule:
                 continue
-            args[unit] = int(schedule['interval_%s' % unit])
+            try:
+                args[unit] = int(schedule['interval_%s' % unit])
+            except Exception:
+                logger.warn('invalid argument on schedule %s: %s parameter %s is not an integer' %
+                            (id_, 'interval_%s' % unit, schedule['interval_%s' % unit]))
     elif trigger == 'cron':
-        cron_trigger = CronTrigger.from_crontab(schedule.get('cron_crontab'))
-        args['trigger'] = cron_trigger
+        try:
+            cron_trigger = CronTrigger.from_crontab(schedule['cron_crontab'])
+            args['trigger'] = cron_trigger
+        except Exception:
+            logger.warn('invalid argument on schedule %s: cron_tab parameter %s is not a valid crontab' %
+                        (id_, schedule.get('cron_crontab')))
     git_create_repo(id_)
-    scheduler.add_job(safe_run_job, id=id_, replace_existing=True, kwargs={'id_': id_}, **args)
+    try:
+        scheduler.add_job(safe_run_job, id=id_, replace_existing=True, kwargs={'id_': id_}, **args)
+    except Exception as e:
+        logger.warn('unable to update job %s: %s' % (id_, e))
+        return False
+    return True
 
 
 def scheduler_delete(scheduler, id_):
-    scheduler.remove_job(job_id=id_)
-    git_delete_repo(id_)
+    """Update a scheduler job, using information from the JSON object
+
+    :param scheduler: the TornadoScheduler instance to modify
+    :type scheduler: TornadoScheduler
+    :param id_: ID of the schedule
+    :type id_: str
+    :returns: True in case of success
+    :rtype: bool"""
+    try:
+        scheduler.remove_job(job_id=id_)
+    except Exception as e:
+        logger.warn('unable to delete job %s: %s' % (id_, e))
+        return False
+    return git_delete_repo(id_)
 
 
 def reset_from_schedules(scheduler):
-    scheduler.remove_all_jobs()
-    for key in read_schedules().get('schedules', {}).keys():
-        scheduler_update(scheduler, id_=key)
+    """"Reset all scheduler jobs, using information from the JSON object
+
+    :param scheduler: the TornadoScheduler instance to modify
+    :type scheduler: TornadoScheduler
+    :returns: True in case of success
+    :rtype: bool"""
+    ret = False
+    try:
+        scheduler.remove_all_jobs()
+        for key in read_schedules().get('schedules', {}).keys():
+            ret |= scheduler_update(scheduler, id_=key)
+    except Exception as e:
+        logger.warn('unable to reset all jobs: %s' % e)
+        return False
+    return ret
 
 
 def git_create_repo(id_):
+    """Create a Git repository
+
+    :param id_: ID of the schedule
+    :type id_: str
+    :returns: True in case of success
+    :rtype: bool"""
     repo_dir = 'storage/%s' % id_
     if os.path.isdir(repo_dir):
-        return
+        return True
     p = subprocess.Popen([GIT_CMD, 'init', repo_dir])
     p.communicate()
+    return p.returncode == 0
 
 
 def git_delete_repo(id_):
+    """Delete a Git repository
+
+    :param id_: ID of the schedule
+    :type id_: str
+    :returns: True in case of success
+    :rtype: bool"""
     repo_dir = 'storage/%s' % id_
     if not os.path.isdir(repo_dir):
-        return
-    shutil.rmtree(repo_dir)
+        return False
+    try:
+        shutil.rmtree(repo_dir)
+    except Exception as e:
+        logger.warn('unable to delete Git repository %s: %s' % (id_, e))
+        return False
+    return True
 
 
 class DiffidoBaseException(Exception):
@@ -278,13 +449,6 @@ class DiffidoBaseException(Exception):
 
 class BaseHandler(tornado.web.RequestHandler):
     """Base class for request handlers."""
-    # Cache currently connected users.
-    _users_cache = {}
-
-    # set of documents we're managing (a collection in MongoDB or a table in a SQL database)
-    document = None
-    collection = None
-
     # A property to access the first value of each argument.
     arguments = property(lambda self: dict([(k, v[0].decode('utf-8'))
                                             for k, v in self.request.arguments.items()]))
@@ -307,10 +471,6 @@ class BaseHandler(tornado.web.RequestHandler):
             message = 'internal error'
         self.build_error(message, status=status_code)
 
-    def is_api(self):
-        """Return True if the path is from an API call."""
-        return self.request.path.startswith('/v%s' % API_VERSION)
-
     def initialize(self, **kwargs):
         """Add every passed (key, value) as attributes of the instance."""
         for key, value in kwargs.items():
@@ -340,16 +500,18 @@ class BaseHandler(tornado.web.RequestHandler):
 
 
 class SchedulesHandler(BaseHandler):
+    """Schedules handler."""
     @gen.coroutine
     def get(self, id_=None, *args, **kwargs):
+        """Get a schedule."""
         if id_ is not None:
-            self.write({'schedule': get_schedule(id_)})
-            return
+            return self.write({'schedule': get_schedule(id_)})
         schedules = read_schedules()
         self.write(schedules)
 
     @gen.coroutine
     def put(self, id_=None, *args, **kwargs):
+        """Update a schedule."""
         if id_ is None:
             return self.build_error(message='update action requires an ID')
         data = self.clean_body
@@ -363,6 +525,7 @@ class SchedulesHandler(BaseHandler):
 
     @gen.coroutine
     def post(self, *args, **kwargs):
+        """Add a schedule."""
         data = self.clean_body
         schedules = read_schedules()
         id_ = next_id(schedules)
@@ -373,6 +536,7 @@ class SchedulesHandler(BaseHandler):
 
     @gen.coroutine
     def delete(self, id_=None, *args, **kwargs):
+        """Delete a schedule."""
         if id_ is None:
             return self.build_error(message='an ID must be specified')
         schedules = read_schedules()
@@ -384,29 +548,31 @@ class SchedulesHandler(BaseHandler):
 
 
 class ResetSchedulesHandler(BaseHandler):
+    """Reset schedules handler."""
     @gen.coroutine
     def post(self, *args, **kwargs):
         reset_from_schedules(self.scheduler)
 
 
 class HistoryHandler(BaseHandler):
+    """History handler."""
     @gen.coroutine
     def get(self, id_, *args, **kwargs):
         self.write(get_history(id_))
 
 
 class DiffHandler(BaseHandler):
+    """Diff handler."""
     @gen.coroutine
     def get(self, id_, commit_id, old_commit_id=None, *args, **kwargs):
         self.write(get_diff(id_, commit_id, old_commit_id))
 
 
 class TemplateHandler(BaseHandler):
-    """Handler for the / path."""
-    app_path = os.path.join(os.path.dirname(__file__), "dist")
-
+    """Handler for the template files in the / path."""
     @gen.coroutine
     def get(self, *args, **kwargs):
+        """Get a template file."""
         page = 'index.html'
         if args and args[0]:
             page = args[0].strip('/')
@@ -415,6 +581,7 @@ class TemplateHandler(BaseHandler):
 
 
 def serve():
+    """Read configuration and start the server."""
     global EMAIL_FROM
     jobstores = {'default': SQLAlchemyJobStore(url=JOBS_STORE)}
     scheduler = TornadoScheduler(jobstores=jobstores)
@@ -469,7 +636,6 @@ def serve():
                                                  options.address if options.address else '127.0.0.1',
                                                  options.port)
     http_server.listen(options.port, options.address)
-
     try:
         IOLoop.instance().start()
     except (KeyboardInterrupt, SystemExit):