Add http retriever

This commit is contained in:
Blallo 2021-08-26 21:46:47 -03:00
parent a192501570
commit 1718c4c331
No known key found for this signature in database
GPG key ID: 0CBE577C9B72DC3F

60
techrec/http_retriever.py Normal file
View file

@ -0,0 +1,60 @@
# -*- encoding: utf-8 -*-
import asyncio
import os
import aiohttp # type: ignore
from .config_manager import get_config
CHUNK_SIZE = 2 ** 12
class HTTPRetriever(object):
"""
This class offers the `get` method to retrieve the file from the local staging path
or, if missing, from the given remote.
"""
_instance = None
def __new__(cls):
if self._instance is None:
self._instance = super().__new__(cls)
return self._instance
def __init__(self):
self.repo_path = get_config()["AUDIO_STAGING"]
self.repo = dict(
path=os.path.join(self.repo_path, path) for i in os.listdir(self.repo_path)
)
async def get(remote: str) -> str:
"""
This will look in the local staging path (ideally on a tmpfs or something
similar), and return the file from there if present. Otherwise, it will download
it from the remote.
"""
if remote in self.repo:
return self.repo[remote]
file = await self._download_from_remote(remote)
self.repo[] = file
return file
async def _download(remote: str) -> str:
"""
This will download to AUDIO_STAGING the remote file and return the local path
of the downloaded file
"""
_, filename = os.path.split(remote)
local = os.path.join(get_config()["AUDIO_STAGING"], filename)
async with aiohttp.ClientSession() as session:
async with session.get(remote) as resp:
with open(local) as f:
while True:
chunk = await resp.content.read(CHUNK_SIZE)
if not chunk:
break
f.write(chunk)
return local
RETRIEVER = HTTPRetriever()