Compare commits

..

2 commits

Author SHA1 Message Date
25fa7bc5bc
Adding docker machinery and makefile 2021-09-15 17:48:01 +02:00
17e3539085
Plug http retriever in current logic 2021-08-26 21:47:08 -03:00
4 changed files with 64 additions and 53 deletions

View file

@ -1,11 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
from distutils.core import setup from distutils.core import setup
import setuptools
REQUIREMENTS = [ REQUIREMENTS = [
"SQLAlchemy==0.8.3", "SQLAlchemy==0.8.3",
"aiofiles==0.6.0", "aiofiles==0.6.0",
"aiohttp==3.7.4",
"click==7.1.2", "click==7.1.2",
"fastapi==0.62.0", "fastapi==0.62.0",
"h11==0.11.0", "h11==0.11.0",

View file

@ -4,7 +4,6 @@ import os.path
import sys import sys
from argparse import Action, ArgumentParser from argparse import Action, ArgumentParser
from datetime import datetime from datetime import datetime
import urllib.request
from . import forge, maint, server from . import forge, maint, server
from .config_manager import get_config from .config_manager import get_config
@ -13,27 +12,18 @@ logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger("cli") logger = logging.getLogger("cli")
CWD = os.getcwd() CWD = os.getcwd()
OK_CODES = [200, 301, 302]
def is_writable(d):
return os.access(d, os.W_OK)
def pre_check_permissions(): def pre_check_permissions():
audio_input = get_config()["AUDIO_INPUT"] def is_writable(d):
if audio_input.startswith("http://") or audio_input.startswith("https://"): return os.access(d, os.W_OK)
with urllib.request.urlopen(audio_input) as req:
if req.code not in OK_CODES: if is_writable(get_config()["AUDIO_INPUT"]):
yield f"Audio input {audio_input} not accessible" yield "Audio input '%s' writable" % get_config()["AUDIO_INPUT"]
if not os.access(get_config()["AUDIO_INPUT"], os.R_OK):
yield "Audio input '%s' unreadable" % get_config()["AUDIO_INPUT"]
sys.exit(10) sys.exit(10)
else: if is_writable(os.getcwd()):
if is_writable(audio_input):
yield "Audio input '%s' writable" % audio_input
if not os.access(audio_input, os.R_OK):
yield "Audio input '%s' unreadable" % audio_input
sys.exit(10)
if is_writable(CWD):
yield "Code writable" yield "Code writable"
if not is_writable(get_config()["AUDIO_OUTPUT"]): if not is_writable(get_config()["AUDIO_OUTPUT"]):
yield "Audio output '%s' not writable" % get_config()["AUDIO_OUTPUT"] yield "Audio output '%s' not writable" % get_config()["AUDIO_OUTPUT"]
@ -82,8 +72,7 @@ def common_pre():
continue continue
path = os.path.realpath(conf) path = os.path.realpath(conf)
if not os.path.exists(path): if not os.path.exists(path):
logger.warn( logger.warn("Configuration file '%s' does not exist; skipping" % path)
"Configuration file '%s' does not exist; skipping" % path)
continue continue
configs.append(path) configs.append(path)
if getattr(sys, "frozen", False): if getattr(sys, "frozen", False):

View file

@ -1,4 +1,3 @@
import asyncio
import logging import logging
import tempfile import tempfile
import os import os
@ -8,7 +7,7 @@ from time import sleep
from typing import Callable, Optional from typing import Callable, Optional
from .config_manager import get_config from .config_manager import get_config
from .http_retriever import download from .http_retriever import RETRIEVER
logger = logging.getLogger("forge") logger = logging.getLogger("forge")
Validator = Callable[[datetime, datetime, str], bool] Validator = Callable[[datetime, datetime, str], bool]
@ -23,10 +22,8 @@ async def get_timefile_exact(time) -> str:
remote_path = os.path.join( remote_path = os.path.join(
remote_repo, time.strftime(get_config()["AUDIO_INPUT_FORMAT"]) remote_repo, time.strftime(get_config()["AUDIO_INPUT_FORMAT"])
) )
if remote_path.startswith("http://") or remote_path.startswith("https://"): if remote.startswith("http"):
logger.info(f"downloading {remote_path}") local = await RETRIEVER.get(remote_path)
print(f"DOWNLOADING -> {remote_path}")
local = await download(remote_path)
return local return local
return local_path return local_path
@ -119,7 +116,7 @@ async def create_mp3(
intervals = [] intervals = []
for begin, start_cut, end_cut in get_files_and_intervals(start, end): for begin, start_cut, end_cut in get_files_and_intervals(start, end):
file = await get_timefile(begin) file = await get_timefile(begin)
intervals.append((file, start_cut, end_cut)) interval.append(file, start_cut, end_cut)
if os.path.exists(outfile): if os.path.exists(outfile):
raise OSError("file '%s' already exists" % outfile) raise OSError("file '%s' already exists" % outfile)
for path, _s, _e in intervals: for path, _s, _e in intervals:
@ -190,8 +187,8 @@ async def create_mp3(
return True return True
def main_cmd(options): async def main_cmd(options):
log = logging.getLogger("forge_main") log = logging.getLogger("forge_main")
outfile = os.path.abspath(os.path.join(options.cwd, options.outfile)) outfile = os.path.abspath(os.path.join(options.cwd, options.outfile))
log.debug("will forge an mp3 into %s" % (outfile)) log.debug("will forge an mp3 into %s" % (outfile))
asyncio.run(create_mp3(options.starttime, options.endtime, outfile)) await create_mp3(options.starttime, options.endtime, outfile)

View file

@ -1,8 +1,6 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import asyncio import asyncio
import os import os
from typing import Optional
from tempfile import mkdtemp
import aiohttp # type: ignore import aiohttp # type: ignore
@ -11,25 +9,52 @@ from .config_manager import get_config
CHUNK_SIZE = 2 ** 12 CHUNK_SIZE = 2 ** 12
async def download(remote: str, staging: Optional[str] = None) -> str: class HTTPRetriever(object):
""" """
This will download to AUDIO_STAGING the remote file and return the local This class offers the `get` method to retrieve the file from the local staging path
path of the downloaded file or, if missing, from the given remote.
"""
_instance = None
def __new__(cls):
if self._instance is None:
self._instance = super().__new__(cls)
return self._instance
def __init__(self):
self.repo_path = get_config()["AUDIO_STAGING"]
self.repo = dict(
path=os.path.join(self.repo_path, path) for i in os.listdir(self.repo_path)
)
async def get(remote: str) -> str:
"""
This will look in the local staging path (ideally on a tmpfs or something
similar), and return the file from there if present. Otherwise, it will download
it from the remote.
"""
if remote in self.repo:
return self.repo[remote]
file = await self._download_from_remote(remote)
self.repo[] = file
return file
async def _download(remote: str) -> str:
"""
This will download to AUDIO_STAGING the remote file and return the local path
of the downloaded file
""" """
_, filename = os.path.split(remote) _, filename = os.path.split(remote)
if staging: local = os.path.join(get_config()["AUDIO_STAGING"], filename)
base = staging
else:
# if no staging is specified, and you want to clean the storage
# used by techrec: rm -rf /tmp/techrec*
base = mkdtemp(prefix="techrec-", dir="/tmp")
local = os.path.join(base, filename)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.get(remote) as resp: async with session.get(remote) as resp:
with open(local, "wb") as f: with open(local) as f:
while True: while True:
chunk = await resp.content.read(CHUNK_SIZE) chunk = await resp.content.read(CHUNK_SIZE)
if not chunk: if not chunk:
break break
f.write(chunk) f.write(chunk)
return local return local
RETRIEVER = HTTPRetriever()