verify that the file has the intended length

retries otherwise

fixes #29
This commit is contained in:
boyska 2021-08-25 14:49:22 +02:00
parent 15376a1052
commit fb79a598da
3 changed files with 72 additions and 9 deletions

View file

@ -22,6 +22,8 @@ AUDIO_INPUT_FORMAT = "%Y-%m/%d/rec-%Y-%m-%d-%H-%M-%S.mp3"
AUDIO_OUTPUT_FORMAT = "techrec-%(startdt)s-%(endtime)s-%(name)s.mp3"
FORGE_TIMEOUT = 20
FORGE_MAX_DURATION = 3600 * 5
FORGE_VERIFY = False
FORGE_VERIFY_THRESHOLD = 3
FFMPEG_OUT_CODEC = ["-acodec", "copy"]
FFMPEG_OPTIONS = ["-loglevel", "warning", "-n"]
FFMPEG_PATH = "ffmpeg"

View file

@ -4,9 +4,12 @@ import os
from datetime import datetime, timedelta
from subprocess import Popen
from time import sleep
from typing import Callable, Optional
from .config_manager import get_config
logger = logging.getLogger("forge")
Validator = Callable[[datetime, datetime, str], bool]
def get_timefile_exact(time) -> str:
"""
@ -85,8 +88,9 @@ def mp3_join(named_intervals):
cmdline += ["-t", str(len(files) * 3600 - (startskip + endskip))]
return cmdline
def create_mp3(start: datetime, end: datetime, outfile: str, options={}, **kwargs):
def create_mp3(start: datetime, end: datetime, outfile: str, options={}, validator: Optional[Validator] = None, **kwargs):
if validator is None:
validator = lambda s,e,f: True
intervals = [
(get_timefile(begin), start_cut, end_cut)
for begin, start_cut, end_cut in get_files_and_intervals(start, end)
@ -117,16 +121,16 @@ def create_mp3(start: datetime, end: datetime, outfile: str, options={}, **kwarg
metadata_list = []
for tag, value in metadata.items():
if "=" in tag:
logging.error('Received a tag with "=" inside, skipping')
logger.error('Received a tag with "=" inside, skipping')
continue
metadata_list.append("-metadata")
metadata_list.append("%s=%s" % (tag, value))
prefix, suffix = os.path.basename(outfile).split('.', 1)
tmp_file = tempfile.NamedTemporaryFile(suffix='.%s' % suffix, prefix='forge-%s' % prefix, delete=False)
p = Popen(
mp3_join(intervals) + metadata_list + ['-y'] + get_config()["FFMPEG_OPTIONS"] + [tmp_file.name]
)
cmd = mp3_join(intervals) + metadata_list + ['-y'] + get_config()["FFMPEG_OPTIONS"] + [tmp_file.name]
logger.info("Running %s", " ".join(cmd))
p = Popen(cmd)
if get_config()["FORGE_TIMEOUT"] == 0:
p.wait()
else:
@ -146,6 +150,9 @@ def create_mp3(start: datetime, end: datetime, outfile: str, options={}, **kwarg
raise Exception("timeout") # TODO: make a specific TimeoutError
if p.returncode != 0:
raise OSError("return code was %d" % p.returncode)
if not validator(start, end, tmp_file.name):
os.unlink(tmp_file.name)
return False
os.rename(tmp_file.name, outfile)
return True

View file

@ -6,6 +6,7 @@ import os
import unicodedata
from datetime import datetime
from typing import Optional
from subprocess import check_output
from fastapi import FastAPI, HTTPException, Request, Response, BackgroundTasks
from fastapi.responses import FileResponse, RedirectResponse, JSONResponse
@ -15,7 +16,7 @@ from pydantic import BaseModel, Field
from .cli import common_pre
from .config_manager import get_config
from .db import Rec, RecDB
from .forge import create_mp3
from .forge import create_mp3, Validator
logger = logging.getLogger("server")
@ -201,13 +202,66 @@ async def generate(recid: int, response: Response, background_tasks: BackgroundT
rec=rec_sanitize(rec),
)
def get_duration(fname) -> float:
lineout = check_output(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-i",
fname,
]).split(b'\n')
duration = next(l for l in lineout if l.startswith(b"duration="))
value = duration.split(b"=")[1]
return float(value)
def get_validator(expected_duration_s: float, error_threshold_s: float) -> Validator:
def validator(start, end, fpath):
try:
duration = get_duration(fpath)
except Exception as exc:
logger.exception('Error determining duration of %s', fpath)
return False
logger.debug('expect %s to be %.1f±%.1fs, is %.1f', fpath, expected_duration_s, error_threshold_s, duration)
if duration > expected_duration_s + error_threshold_s:
return False
if duration < expected_duration_s - error_threshold_s:
return False
return True
return validator
def generate_mp3(db_id: int, **kwargs):
'''creates and mark it as ready in the db'''
result = create_mp3(**kwargs)
logger.debug('Create mp3 for %d -> %s', db_id, result)
if get_config()['FORGE_VERIFY']:
validator = get_validator(
(kwargs['end'] - kwargs['start']).total_seconds(),
get_config()['FORGE_VERIFY_THRESHOLD']
)
retries = 10
else:
validator = None
retries = 1
for i in range(retries):
result = create_mp3(validator=validator, **kwargs)
logger.debug('Create mp3 for %d -> %s', db_id, result)
if result:
break
elif i < retries - 1:
logger.debug("waiting %d", i+1)
time.sleep(i + 1) # waiting time increases at each retry
else:
logger.warning("Could not create mp3 for %d: validation failed", db_id)
return False
rec = db._search(_id=db_id)[0]
rec.ready = True
db.get_session(rec).commit()
return True