Fix startup checks

Do not singletonize the retriever
Fix dependencies
2021-09-15 17:54:40 +02:00 · 2021-09-15 17:54:35 +02:00 · 2021-09-15 17:54:30 +02:00 · 2021-09-15 17:54:25 +02:00 · 2021-09-15 17:54:20 +02:00
4 changed files with 53 additions and 64 deletions
--- a/setup.py
+++ b/setup.py
@ -1,11 +1,11 @@
 #!/usr/bin/env python
 from distutils.core import setup
 import setuptools
 REQUIREMENTS = [
    "SQLAlchemy==0.8.3",
    "aiofiles==0.6.0",
    "aiohttp==3.7.4",
    "click==7.1.2",
    "fastapi==0.62.0",
    "h11==0.11.0",
--- a/techrec/cli.py
+++ b/techrec/cli.py
@ -4,6 +4,7 @@ import os.path
 import sys
 from argparse import Action, ArgumentParser
 from datetime import datetime
 import urllib.request
 from . import forge, maint, server
 from .config_manager import get_config
@ -12,18 +13,27 @@ logging.basicConfig(stream=sys.stdout)
 logger = logging.getLogger("cli")
 CWD = os.getcwd()
 OK_CODES = [200, 301, 302]
 def is_writable(d):
    return os.access(d, os.W_OK)
 def pre_check_permissions():
-    def is_writable(d):
+    audio_input = get_config()["AUDIO_INPUT"]
-        return os.access(d, os.W_OK)
+    if audio_input.startswith("http://") or audio_input.startswith("https://"):
-
+        with urllib.request.urlopen(audio_input) as req:
-    if is_writable(get_config()["AUDIO_INPUT"]):
+            if req.code not in OK_CODES:
-        yield "Audio input '%s' writable" % get_config()["AUDIO_INPUT"]
+                yield f"Audio input {audio_input} not accessible"
-    if not os.access(get_config()["AUDIO_INPUT"], os.R_OK):
+                sys.exit(10)
-        yield "Audio input '%s' unreadable" % get_config()["AUDIO_INPUT"]
+    else:
-        sys.exit(10)
+        if is_writable(audio_input):
-    if is_writable(os.getcwd()):
+            yield "Audio input '%s' writable" % audio_input
        if not os.access(audio_input, os.R_OK):
            yield "Audio input '%s' unreadable" % audio_input
            sys.exit(10)
    if is_writable(CWD):
        yield "Code writable"
    if not is_writable(get_config()["AUDIO_OUTPUT"]):
        yield "Audio output '%s' not writable" % get_config()["AUDIO_OUTPUT"]
@ -72,7 +82,8 @@ def common_pre():
                continue
            path = os.path.realpath(conf)
            if not os.path.exists(path):
-                logger.warn("Configuration file '%s' does not exist; skipping" % path)
+                logger.warn(
                    "Configuration file '%s' does not exist; skipping" % path)
                continue
            configs.append(path)
    if getattr(sys, "frozen", False):
--- a/techrec/forge.py
+++ b/techrec/forge.py
@ -1,3 +1,4 @@
 import asyncio
 import logging
 import tempfile
 import os
@ -7,7 +8,7 @@ from time import sleep
 from typing import Callable, Optional
 from .config_manager import get_config
-from .http_retriever import RETRIEVER
+from .http_retriever import download
 logger = logging.getLogger("forge")
 Validator = Callable[[datetime, datetime, str], bool]
@ -22,8 +23,10 @@ async def get_timefile_exact(time) -> str:
    remote_path = os.path.join(
        remote_repo, time.strftime(get_config()["AUDIO_INPUT_FORMAT"])
    )
-    if remote.startswith("http"):
+    if remote_path.startswith("http://") or remote_path.startswith("https://"):
-        local = await RETRIEVER.get(remote_path)
+        logger.info(f"downloading {remote_path}")
        print(f"DOWNLOADING -> {remote_path}")
        local = await download(remote_path)
        return local
    return local_path
@ -116,7 +119,7 @@ async def create_mp3(
    intervals = []
    for begin, start_cut, end_cut in get_files_and_intervals(start, end):
        file = await get_timefile(begin)
-        interval.append(file, start_cut, end_cut)
+        intervals.append((file, start_cut, end_cut))
    if os.path.exists(outfile):
        raise OSError("file '%s' already exists" % outfile)
    for path, _s, _e in intervals:
@ -187,8 +190,8 @@ async def create_mp3(
    return True
-async def main_cmd(options):
+def main_cmd(options):
    log = logging.getLogger("forge_main")
    outfile = os.path.abspath(os.path.join(options.cwd, options.outfile))
    log.debug("will forge an mp3 into %s" % (outfile))
-    await create_mp3(options.starttime, options.endtime, outfile)
+    asyncio.run(create_mp3(options.starttime, options.endtime, outfile))
--- a/techrec/http_retriever.py
+++ b/techrec/http_retriever.py
@ -1,6 +1,8 @@
 # -*- encoding: utf-8 -*-
 import asyncio
 import os
 from typing import Optional
 from tempfile import mkdtemp
 import aiohttp  # type: ignore
@ -9,52 +11,25 @@ from .config_manager import get_config
 CHUNK_SIZE = 2 ** 12
-class HTTPRetriever(object):
+async def download(remote: str, staging: Optional[str] = None) -> str:
    """
-    This class offers the `get` method to retrieve the file from the local staging path
+    This will download to AUDIO_STAGING the remote file and return the local
-    or, if missing, from the given remote.
+    path of the downloaded file
    """
-    _instance = None
+    _, filename = os.path.split(remote)
-
+    if staging:
-    def __new__(cls):
+        base = staging
-        if self._instance is None:
+    else:
-            self._instance = super().__new__(cls)
+        # if no staging is specified, and you want to clean the storage
-        return self._instance
+        # used by techrec: rm -rf /tmp/techrec*
-
+        base = mkdtemp(prefix="techrec-", dir="/tmp")
-    def __init__(self):
+    local = os.path.join(base, filename)
-        self.repo_path = get_config()["AUDIO_STAGING"]
+    async with aiohttp.ClientSession() as session:
-        self.repo = dict(
+        async with session.get(remote) as resp:
-            path=os.path.join(self.repo_path, path) for i in os.listdir(self.repo_path)
+            with open(local, "wb") as f:
-        )
+                while True:
-
+                    chunk = await resp.content.read(CHUNK_SIZE)
-    async def get(remote: str) -> str:
+                    if not chunk:
-        """
+                        break
-        This will look in the local staging path (ideally on a tmpfs or something
+                    f.write(chunk)
-        similar), and return the file from there if present. Otherwise, it will download
+    return local
        it from the remote.
        """
        if remote in self.repo:
            return self.repo[remote]
        file = await self._download_from_remote(remote)
        self.repo[] = file
        return file
    async def _download(remote: str) -> str:
        """
        This will download to AUDIO_STAGING the remote file and return the local path
        of the downloaded file
        """
        _, filename = os.path.split(remote)
        local = os.path.join(get_config()["AUDIO_STAGING"], filename)
        async with aiohttp.ClientSession() as session:
            async with session.get(remote) as resp:
                with open(local) as f:
                    while True:
                        chunk = await resp.content.read(CHUNK_SIZE)
                        if not chunk:
                            break
                        f.write(chunk)
        return local
 RETRIEVER = HTTPRetriever()
Author	SHA1	Message	Date
Blallo	26181d083f	Fix startup checks	2021-09-15 17:54:40 +02:00
Blallo	acc966b488	Do not singletonize the retriever	2021-09-15 17:54:35 +02:00
Blallo	d967440a6d	Fix dependencies	2021-09-15 17:54:30 +02:00
Blallo	ef9842e4d2	Adding docker machinery and makefile	2021-09-15 17:54:25 +02:00
Blallo	5949e79f46	Plug http retriever in current logic	2021-09-15 17:54:20 +02:00