Actually look at input directory
refs #10 ... still no support for HTTP!
This commit is contained in:
parent
14704ec7ed
commit
75291d7704
1 changed files with 99 additions and 58 deletions
157
techrec/forge.py
157
techrec/forge.py
|
@ -7,8 +7,8 @@ from subprocess import Popen
|
|||
from time import sleep
|
||||
from typing import Callable, Optional
|
||||
|
||||
from .config_manager import get_config
|
||||
from .http_retriever import download
|
||||
from techrec.config_manager import get_config
|
||||
from techrec.http_retriever import download
|
||||
|
||||
logger = logging.getLogger("forge")
|
||||
Validator = Callable[[datetime, datetime, str], bool]
|
||||
|
@ -40,11 +40,6 @@ def round_timefile(exact: datetime) -> datetime:
|
|||
return datetime(exact.year, exact.month, exact.day, exact.hour)
|
||||
|
||||
|
||||
async def get_timefile(exact: datetime) -> str:
|
||||
file = await get_timefile_exact(round_timefile(exact))
|
||||
return file
|
||||
|
||||
|
||||
def get_files_and_intervals(start, end, rounder=round_timefile):
|
||||
"""
|
||||
both arguments are datetime objects
|
||||
|
@ -65,43 +60,102 @@ def get_files_and_intervals(start, end, rounder=round_timefile):
|
|||
start = begin + timedelta(hours=1)
|
||||
|
||||
|
||||
def mp3_join(named_intervals):
|
||||
"""
|
||||
Note that these are NOT the intervals returned by get_files_and_intervals,
|
||||
as they do not supply a filename, but only a datetime.
|
||||
What we want in input is basically the same thing, but with get_timefile()
|
||||
applied on the first element
|
||||
class InputBackend:
|
||||
def __init__(self, basepath):
|
||||
self.base = basepath
|
||||
self.log = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
This function make the (quite usual) assumption that the only start_cut (if
|
||||
any) is at the first file, and the last one is at the last file
|
||||
"""
|
||||
async def search_files(self, start, end):
|
||||
# assumption: a day is not split in multiple folder
|
||||
start_dir = self.parent_dir(self.time_to_uri(start))
|
||||
end_dir = self.parent_dir(self.time_to_uri(end))
|
||||
|
||||
files = {
|
||||
fpath
|
||||
for directory in {start_dir, end_dir}
|
||||
for fpath in await self.list_dir(directory)
|
||||
}
|
||||
files_date = [] # tuple of str, datetime
|
||||
for fpath in files:
|
||||
try:
|
||||
dt = self.uri_to_time(fpath)
|
||||
except Exception as exc:
|
||||
self.log.debug("Skipping %s", fpath)
|
||||
print(exc)
|
||||
continue
|
||||
if dt > end:
|
||||
continue
|
||||
files_date.append((fpath, dt))
|
||||
|
||||
# The first file in the list will now be the last chunk to be added.
|
||||
files_date.sort(key=lambda fpath_dt: fpath_dt[1], reverse=True)
|
||||
final_files = []
|
||||
need_to_exit = False
|
||||
for fpath, dt in files_date:
|
||||
if need_to_exit:
|
||||
break
|
||||
if dt < start:
|
||||
need_to_exit = True
|
||||
final_files.insert(0, fpath)
|
||||
self.log.info("Relevant files: %s", ", ".join(final_files))
|
||||
return final_files
|
||||
|
||||
|
||||
async def list_dir(self, path):
|
||||
raise NotImplementedError()
|
||||
|
||||
def parent_dir(self, path):
|
||||
return os.path.dirname(path)
|
||||
|
||||
def time_to_uri(self, time: datetime) -> str:
|
||||
return os.path.join(
|
||||
str(self.base),
|
||||
time.strftime(get_config()["AUDIO_INPUT_FORMAT"])
|
||||
)
|
||||
|
||||
def uri_to_time(self, fpath: str) -> datetime:
|
||||
return datetime.strptime(
|
||||
os.path.basename(fpath),
|
||||
get_config()["AUDIO_INPUT_FORMAT"].split('/')[-1])
|
||||
|
||||
async def get_file(uri: str) -> str:
|
||||
return uri
|
||||
|
||||
class DirBackend(InputBackend):
|
||||
|
||||
def uri_to_relative(self, fpath: str) -> str:
|
||||
return os.path.relpath(fpath, str(self.base))
|
||||
|
||||
async def list_dir(self, path):
|
||||
files = [os.path.join(path, f) for f in async_os.listdir(path)]
|
||||
return files
|
||||
|
||||
|
||||
|
||||
class HttpBackend(InputBackend):
|
||||
async def get_file(uri: str) -> str:
|
||||
"""
|
||||
time is not "rounded" to match the real file;
|
||||
that work is done in get_timefile(time)
|
||||
"""
|
||||
self.log.info(f"downloading: {uri}")
|
||||
local = await download(
|
||||
uri,
|
||||
basic_auth=get_config()['AUDIO_INPUT_BASICAUTH'],
|
||||
)
|
||||
return local
|
||||
|
||||
|
||||
|
||||
def get_ffmpeg_cmdline(fpaths: list, backend, start: datetime, end: datetime) -> list:
|
||||
ffmpeg = get_config()["FFMPEG_PATH"]
|
||||
startskip = None
|
||||
endskip = None
|
||||
files = []
|
||||
for (filename, start_cut, end_cut) in named_intervals:
|
||||
# this happens only one time, and only at the first iteration
|
||||
if start_cut:
|
||||
if startskip is not None:
|
||||
raise Exception("error in first cut iteration")
|
||||
startskip = start_cut
|
||||
# this happens only one time, and only at the last iteration
|
||||
if end_cut:
|
||||
if endskip is not None:
|
||||
raise Exception("error in last iteration")
|
||||
endskip = end_cut
|
||||
if "|" in filename:
|
||||
raise Exception(f"'|' in {filename}")
|
||||
files.append(filename)
|
||||
|
||||
cmdline = [ffmpeg, "-i", "concat:%s" % "|".join(files)]
|
||||
cmdline = [ffmpeg, "-i", "concat:%s" % "|".join(fpaths)]
|
||||
cmdline += get_config()["FFMPEG_OUT_CODEC"]
|
||||
if startskip is not None:
|
||||
cmdline += ["-ss", str(startskip)]
|
||||
else:
|
||||
startskip = 0
|
||||
if endskip is not None:
|
||||
cmdline += ["-t", str(len(files) * 3600 - (startskip + endskip))]
|
||||
|
||||
startskip = (start - backend.uri_to_time(fpaths[0])).total_seconds()
|
||||
if startskip > 0:
|
||||
cmdline += ["-ss", "%d" % startskip]
|
||||
cmdline += ["-t", "%d" % (end - start).total_seconds()]
|
||||
return cmdline
|
||||
|
||||
|
||||
|
@ -113,24 +167,11 @@ async def create_mp3(
|
|||
validator: Optional[Validator] = None,
|
||||
**kwargs,
|
||||
):
|
||||
if validator is None:
|
||||
|
||||
def validator(s, e, f):
|
||||
return True
|
||||
be = DirBackend(get_config()['AUDIO_INPUT'])
|
||||
fpaths = await be.search_files(start, end)
|
||||
|
||||
|
||||
intervals = []
|
||||
for begin, start_cut, end_cut in get_files_and_intervals(start, end):
|
||||
try:
|
||||
filename = await get_timefile(begin)
|
||||
except Exception as e:
|
||||
raise ValueError("Error while retrieving file: %s" % e) from e
|
||||
intervals.append((filename, start_cut, end_cut))
|
||||
if os.path.exists(outfile):
|
||||
raise OSError("file '%s' already exists" % outfile)
|
||||
for path, _s, _e in intervals:
|
||||
if not os.path.exists(path):
|
||||
raise OSError(
|
||||
"file '%s' does not exist; recording system broken?" % path)
|
||||
|
||||
# metadata date/time formatted according to
|
||||
# https://wiki.xiph.org/VorbisComment#Date_and_time
|
||||
|
@ -167,7 +208,7 @@ async def create_mp3(
|
|||
dir=os.path.dirname(outfile),
|
||||
)
|
||||
cmd = (
|
||||
mp3_join(intervals)
|
||||
get_ffmpeg_cmdline(fpaths, be, start, end)
|
||||
+ metadata_list
|
||||
+ ["-y"]
|
||||
+ get_config()["FFMPEG_OPTIONS"]
|
||||
|
|
Loading…
Reference in a new issue