From 85b06ae513bd11bb03f3968f8069a84f20c26adc Mon Sep 17 00:00:00 2001 From: boyska Date: Fri, 29 Nov 2024 10:51:31 +0100 Subject: [PATCH] Initial commit --- .gitignore | 5 ++ caricari/__init__.py | 0 caricari/database.py | 38 +++++++++++ caricari/httpcommon.py | 21 ++++++ caricari/private.py | 122 +++++++++++++++++++++++++++++++++++ caricari/public.py | 59 +++++++++++++++++ caricari/templates/form.html | 13 ++++ caricari/templates/ok.html | 13 ++++ config.toml | 7 ++ requirements.txt | 34 ++++++++++ 10 files changed, 312 insertions(+) create mode 100644 .gitignore create mode 100644 caricari/__init__.py create mode 100644 caricari/database.py create mode 100644 caricari/httpcommon.py create mode 100644 caricari/private.py create mode 100644 caricari/public.py create mode 100644 caricari/templates/form.html create mode 100644 caricari/templates/ok.html create mode 100644 config.toml create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6b1dfbd --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +.*.sw[po] +/venv/ +*.sqlite +/files/ diff --git a/caricari/__init__.py b/caricari/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/caricari/database.py b/caricari/database.py new file mode 100644 index 0000000..3c4b798 --- /dev/null +++ b/caricari/database.py @@ -0,0 +1,38 @@ +from sqlalchemy import MetaData, Table, Column, Integer, String, ForeignKey +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import DeclarativeBase, relationship, Mapped, mapped_column + + +# declarative base class +class Base(DeclarativeBase): + pass + + +metadata = Base.metadata + + +class Original(Base): + __tablename__ = "original" + id: Mapped[int] = mapped_column(primary_key=True) + uploader: Mapped[str] + upload_time: Mapped[int] # utc unix epoch + mime: Mapped[str] + filepath: Mapped[str] + name: Mapped[str] + size: Mapped[int] + sha256: Mapped[str] = Column(String, unique=True) + + archived: Mapped[list["Archived"]] = relationship(back_populates="original") + + +class Archived(Base): + __tablename__ = "archived" + id = Column(Integer, primary_key=True) + original_id: Mapped[int] = mapped_column(ForeignKey("original.id"), nullable=False) + original: Mapped[Original] = relationship(back_populates="archived") + link: Mapped[str] # https://archive.org/... + format: Mapped[str] # "mp3" + archive: Mapped[str] # "archive.org" + archive_time: Mapped[int] + size: Mapped[int] = Column(Integer) + sha256: Mapped[str] = Column(String, unique=True) diff --git a/caricari/httpcommon.py b/caricari/httpcommon.py new file mode 100644 index 0000000..a4ee9fb --- /dev/null +++ b/caricari/httpcommon.py @@ -0,0 +1,21 @@ +import os +from pathlib import Path + +import toml + +from pydantic import BaseModel + + +class ProxyInfo(BaseModel): + # this class encapsulate all the info we get from the proxy via headers being set + x_forwarded_user: str | None = "anonymous" + + +def get_config(): + conf = toml.load(open(os.getenv("CARICARI_CONFIG", "config.toml"))) + conf.setdefault("private", {}) + conf.setdefault("public", {}) + conf.setdefault("general", {}) + conf["general"]["files"] = Path(conf["general"]["files"]) + + return conf diff --git a/caricari/private.py b/caricari/private.py new file mode 100644 index 0000000..cb3b1e8 --- /dev/null +++ b/caricari/private.py @@ -0,0 +1,122 @@ +import os +from pathlib import Path +from typing import Annotated +import datetime +import aiofiles +import tempfile +import hashlib + +import toml +import magic + +import sqlalchemy.exc +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from fastapi import FastAPI, UploadFile, File, Header, Form, HTTPException +from fastapi.requests import Request +from fastapi.responses import RedirectResponse +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel + +from .httpcommon import ProxyInfo, get_config +from . import database + + +CONFIG = get_config() + +app = FastAPI() +templates = Jinja2Templates(directory=Path(__file__).parent / "templates") + +engine = create_engine(CONFIG["general"]["db"]) +database.metadata.create_all(engine) +session_pool = sessionmaker(bind=engine) + + +@app.get("/") +def home(): + return RedirectResponse(url=app.url_path_for("upload_form")) + + +@app.get("/upload") +def upload_form(request: Request): + return templates.TemplateResponse( + request=request, + name="form.html", + ) + + +class UploadModel(BaseModel): + file: UploadFile + + +@app.post("/upload") +async def upload( + data: Annotated[UploadModel, Form()], + proxy: Annotated[ProxyInfo, Header()], + request: Request, +): + now = datetime.datetime.now() + directory = f"{now.year}/{now.month}" + if not Path(data.file.filename).suffix: + raise HTTPException(status_code=400, detail="Invalid filename extension") + + # XXX: normalize filename + # XXX: avoid duplicates + temp = tempfile.NamedTemporaryFile( + prefix=Path(data.file.filename).stem, + suffix=Path(data.file.filename).suffix, + dir=Path(CONFIG["general"]["files"]) / directory, + delete=False, + ) + temp.close() + print(temp.name) + sha256 = hashlib.sha256() + + try: + async with aiofiles.open(temp.name, "wb") as out_file: + while content := await data.file.read(64 * 1024): + await out_file.write(content) + sha256.update(content) + + orig = database.Original( + filepath=str(Path(directory) / Path(temp.name).name), + name=data.file.filename, + uploader=proxy.x_forwarded_user, + upload_time=int(now.strftime("%s")), + mime=magic.from_file(temp.name, mime=True), + size=Path(temp.name).stat().st_size, + sha256=sha256.hexdigest(), + ) + with session_pool() as conn: + conn.add(orig) + try: + conn.commit() + except sqlalchemy.exc.IntegrityError as exc: + conn.rollback() + conflicting = ( + conn.query(database.Original) + .filter(database.Original.sha256 == sha256.hexdigest()) + .one_or_none() + ) + if conflicting: + detail = f"File is already in the archive: {conflicting.filepath}" + else: + detail = "File is already in the archive" + raise HTTPException(status_code=400, detail=detail) + conn.refresh(orig) + data = {"id": orig.id, "path": orig.filepath, + "url": CONFIG['public']['baseurl'] + "/dl/" + orig.filepath + } + if "application/json" in request.headers.get("accept", ""): + return data + else: + return templates.TemplateResponse( + name="ok.html", request=request, context=data) + except Exception as exc: + Path(temp.name).unlink() + raise + + +@app.post("/list") +def list(): + return "lista file" diff --git a/caricari/public.py b/caricari/public.py new file mode 100644 index 0000000..78c232f --- /dev/null +++ b/caricari/public.py @@ -0,0 +1,59 @@ +import os +from pathlib import Path +from typing import Annotated +import datetime +import aiofiles +import tempfile +import hashlib + +import magic + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, joinedload +from fastapi import FastAPI, HTTPException +from fastapi.requests import Request +from fastapi.responses import RedirectResponse, FileResponse +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel + +from .httpcommon import ProxyInfo, get_config +from . import database + +CONFIG = get_config() + +app = FastAPI() +templates = Jinja2Templates(directory=Path(__file__).parent / "templates") + +engine = create_engine(CONFIG["general"]["db"]) +database.metadata.create_all(engine) +session_pool = sessionmaker(bind=engine) + + +@app.get("/") +def home(): + return "public archive" + + +@app.get("/dl/{path:path}") +async def get_file(path: str): + with session_pool() as conn: + original = ( + conn.query(database.Original) + .options(joinedload(database.Original.archived)) + .filter(database.Original.filepath == path) + .one_or_none() + ) + if original is None: + raise HTTPException(status_code=404) + if not original.archived: + final_path = Path(CONFIG["general"]["files"]) / original.filepath + if final_path.exists(): + return FileResponse( + # XXX: avoid it being an attachment + final_path, media_type=original.mime, filename=original.name + ) + else: + return 404 + + # XXX: handle ?accept= + return RedirectResponse(original.archived[0].link) diff --git a/caricari/templates/form.html b/caricari/templates/form.html new file mode 100644 index 0000000..d6f7974 --- /dev/null +++ b/caricari/templates/form.html @@ -0,0 +1,13 @@ + + + Upload file + + +

Upload file

+ +
+ + +
+ + diff --git a/caricari/templates/ok.html b/caricari/templates/ok.html new file mode 100644 index 0000000..b27f499 --- /dev/null +++ b/caricari/templates/ok.html @@ -0,0 +1,13 @@ + + + File has been uploaded + + +

Success!

+ +

+ File has been loaded at {{url}} +

+ + + diff --git a/config.toml b/config.toml new file mode 100644 index 0000000..4bda1fd --- /dev/null +++ b/config.toml @@ -0,0 +1,7 @@ +[general] +logging = 'DEBUG' +db = 'sqlite:///db.sqlite' +files = 'files/' + +[public] +baseurl = 'http://127.0.0.1:8001' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8bdedce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,34 @@ +annotated-types==0.7.0 +anyio==4.6.2.post1 +certifi==2024.8.30 +click==8.1.7 +dnspython==2.7.0 +email_validator==2.2.0 +fastapi==0.115.5 +fastapi-cli==0.0.5 +h11==0.14.0 +httpcore==1.0.7 +httptools==0.6.4 +httpx==0.28.0 +idna==3.10 +Jinja2==3.1.4 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +pydantic==2.10.2 +pydantic_core==2.27.1 +Pygments==2.18.0 +python-dotenv==1.0.1 +python-multipart==0.0.18 +PyYAML==6.0.2 +rich==13.9.4 +shellingham==1.5.4 +sniffio==1.3.1 +starlette==0.41.3 +toml==0.10.2 +typer==0.14.0 +typing_extensions==4.12.2 +uvicorn==0.32.1 +uvloop==0.21.0 +watchfiles==1.0.0 +websockets==14.1