Add http retriever
This commit is contained in:
parent
a192501570
commit
1718c4c331
1 changed files with 60 additions and 0 deletions
60
techrec/http_retriever.py
Normal file
60
techrec/http_retriever.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
import aiohttp # type: ignore
|
||||||
|
|
||||||
|
from .config_manager import get_config
|
||||||
|
|
||||||
|
CHUNK_SIZE = 2 ** 12
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPRetriever(object):
|
||||||
|
"""
|
||||||
|
This class offers the `get` method to retrieve the file from the local staging path
|
||||||
|
or, if missing, from the given remote.
|
||||||
|
"""
|
||||||
|
_instance = None
|
||||||
|
|
||||||
|
def __new__(cls):
|
||||||
|
if self._instance is None:
|
||||||
|
self._instance = super().__new__(cls)
|
||||||
|
return self._instance
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.repo_path = get_config()["AUDIO_STAGING"]
|
||||||
|
self.repo = dict(
|
||||||
|
path=os.path.join(self.repo_path, path) for i in os.listdir(self.repo_path)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def get(remote: str) -> str:
|
||||||
|
"""
|
||||||
|
This will look in the local staging path (ideally on a tmpfs or something
|
||||||
|
similar), and return the file from there if present. Otherwise, it will download
|
||||||
|
it from the remote.
|
||||||
|
"""
|
||||||
|
if remote in self.repo:
|
||||||
|
return self.repo[remote]
|
||||||
|
file = await self._download_from_remote(remote)
|
||||||
|
self.repo[] = file
|
||||||
|
return file
|
||||||
|
|
||||||
|
async def _download(remote: str) -> str:
|
||||||
|
"""
|
||||||
|
This will download to AUDIO_STAGING the remote file and return the local path
|
||||||
|
of the downloaded file
|
||||||
|
"""
|
||||||
|
_, filename = os.path.split(remote)
|
||||||
|
local = os.path.join(get_config()["AUDIO_STAGING"], filename)
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(remote) as resp:
|
||||||
|
with open(local) as f:
|
||||||
|
while True:
|
||||||
|
chunk = await resp.content.read(CHUNK_SIZE)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
f.write(chunk)
|
||||||
|
return local
|
||||||
|
|
||||||
|
|
||||||
|
RETRIEVER = HTTPRetriever()
|
Loading…
Reference in a new issue