Actually look at input directory

refs #10
... still no support for HTTP!
This commit is contained in:
boyska 2023-03-31 01:43:26 +02:00
parent 14704ec7ed
commit 75291d7704

View file

@ -7,8 +7,8 @@ from subprocess import Popen
from time import sleep
from typing import Callable, Optional
from .config_manager import get_config
from .http_retriever import download
from techrec.config_manager import get_config
from techrec.http_retriever import download
logger = logging.getLogger("forge")
Validator = Callable[[datetime, datetime, str], bool]
@ -40,11 +40,6 @@ def round_timefile(exact: datetime) -> datetime:
return datetime(exact.year, exact.month, exact.day, exact.hour)
async def get_timefile(exact: datetime) -> str:
file = await get_timefile_exact(round_timefile(exact))
return file
def get_files_and_intervals(start, end, rounder=round_timefile):
"""
both arguments are datetime objects
@ -65,43 +60,102 @@ def get_files_and_intervals(start, end, rounder=round_timefile):
start = begin + timedelta(hours=1)
def mp3_join(named_intervals):
"""
Note that these are NOT the intervals returned by get_files_and_intervals,
as they do not supply a filename, but only a datetime.
What we want in input is basically the same thing, but with get_timefile()
applied on the first element
class InputBackend:
def __init__(self, basepath):
self.base = basepath
self.log = logging.getLogger(self.__class__.__name__)
This function make the (quite usual) assumption that the only start_cut (if
any) is at the first file, and the last one is at the last file
"""
async def search_files(self, start, end):
# assumption: a day is not split in multiple folder
start_dir = self.parent_dir(self.time_to_uri(start))
end_dir = self.parent_dir(self.time_to_uri(end))
files = {
fpath
for directory in {start_dir, end_dir}
for fpath in await self.list_dir(directory)
}
files_date = [] # tuple of str, datetime
for fpath in files:
try:
dt = self.uri_to_time(fpath)
except Exception as exc:
self.log.debug("Skipping %s", fpath)
print(exc)
continue
if dt > end:
continue
files_date.append((fpath, dt))
# The first file in the list will now be the last chunk to be added.
files_date.sort(key=lambda fpath_dt: fpath_dt[1], reverse=True)
final_files = []
need_to_exit = False
for fpath, dt in files_date:
if need_to_exit:
break
if dt < start:
need_to_exit = True
final_files.insert(0, fpath)
self.log.info("Relevant files: %s", ", ".join(final_files))
return final_files
async def list_dir(self, path):
raise NotImplementedError()
def parent_dir(self, path):
return os.path.dirname(path)
def time_to_uri(self, time: datetime) -> str:
return os.path.join(
str(self.base),
time.strftime(get_config()["AUDIO_INPUT_FORMAT"])
)
def uri_to_time(self, fpath: str) -> datetime:
return datetime.strptime(
os.path.basename(fpath),
get_config()["AUDIO_INPUT_FORMAT"].split('/')[-1])
async def get_file(uri: str) -> str:
return uri
class DirBackend(InputBackend):
def uri_to_relative(self, fpath: str) -> str:
return os.path.relpath(fpath, str(self.base))
async def list_dir(self, path):
files = [os.path.join(path, f) for f in async_os.listdir(path)]
return files
class HttpBackend(InputBackend):
async def get_file(uri: str) -> str:
"""
time is not "rounded" to match the real file;
that work is done in get_timefile(time)
"""
self.log.info(f"downloading: {uri}")
local = await download(
uri,
basic_auth=get_config()['AUDIO_INPUT_BASICAUTH'],
)
return local
def get_ffmpeg_cmdline(fpaths: list, backend, start: datetime, end: datetime) -> list:
ffmpeg = get_config()["FFMPEG_PATH"]
startskip = None
endskip = None
files = []
for (filename, start_cut, end_cut) in named_intervals:
# this happens only one time, and only at the first iteration
if start_cut:
if startskip is not None:
raise Exception("error in first cut iteration")
startskip = start_cut
# this happens only one time, and only at the last iteration
if end_cut:
if endskip is not None:
raise Exception("error in last iteration")
endskip = end_cut
if "|" in filename:
raise Exception(f"'|' in {filename}")
files.append(filename)
cmdline = [ffmpeg, "-i", "concat:%s" % "|".join(files)]
cmdline = [ffmpeg, "-i", "concat:%s" % "|".join(fpaths)]
cmdline += get_config()["FFMPEG_OUT_CODEC"]
if startskip is not None:
cmdline += ["-ss", str(startskip)]
else:
startskip = 0
if endskip is not None:
cmdline += ["-t", str(len(files) * 3600 - (startskip + endskip))]
startskip = (start - backend.uri_to_time(fpaths[0])).total_seconds()
if startskip > 0:
cmdline += ["-ss", "%d" % startskip]
cmdline += ["-t", "%d" % (end - start).total_seconds()]
return cmdline
@ -113,24 +167,11 @@ async def create_mp3(
validator: Optional[Validator] = None,
**kwargs,
):
if validator is None:
def validator(s, e, f):
return True
be = DirBackend(get_config()['AUDIO_INPUT'])
fpaths = await be.search_files(start, end)
intervals = []
for begin, start_cut, end_cut in get_files_and_intervals(start, end):
try:
filename = await get_timefile(begin)
except Exception as e:
raise ValueError("Error while retrieving file: %s" % e) from e
intervals.append((filename, start_cut, end_cut))
if os.path.exists(outfile):
raise OSError("file '%s' already exists" % outfile)
for path, _s, _e in intervals:
if not os.path.exists(path):
raise OSError(
"file '%s' does not exist; recording system broken?" % path)
# metadata date/time formatted according to
# https://wiki.xiph.org/VorbisComment#Date_and_time
@ -167,7 +208,7 @@ async def create_mp3(
dir=os.path.dirname(outfile),
)
cmd = (
mp3_join(intervals)
get_ffmpeg_cmdline(fpaths, be, start, end)
+ metadata_list
+ ["-y"]
+ get_config()["FFMPEG_OPTIONS"]