123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- defmodule Openpod.Provider.Archive.Parser do
- @moduledoc """
- This module provides a public API for fetching data from archive.org and convert them
- in a common podcast data structures.
- """
- alias Openpod.Utility.Format
- alias __MODULE__
- @archive_metadata_url "http://archive.org/metadata/{identifier}"
- @download_url "https://archive.org/download/{identifier}/{filename}"
- @podcast_link "https://archive.org/details/{identifier}"
- @custom_metadata_defaults %{
- "link" => nil,
- "image" => %{
- "url" => nil,
- "title" => nil,
- "link" => nil,
- },
- "category" => "",
- "explicit" => "no",
- "version" => "1",
- }
- @enforce_keys [:identifier]
- defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults]
- def by_identifier(identifier) do
- %Parser{identifier: identifier}
- |> enrich_with_archive_metadata()
- |> to_openpod_data()
- end
- def to_openpod_data(token) do
- %{
- podcast: podcast_data(token),
- items: items_data(token)
- }
- end
- defp podcast_data(token = %{archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do
- link = Format.compile(@podcast_link, identifier: token.identifier)
- %{
- title: metadata["title"],
- description: metadata["description"],
- webmaster: metadata["uploader"],
- managingEditor: metadata["uploader"],
- owner: %{
- name: metadata["creator"],
- email: metadata["uploader"],
- },
- keywords: parse_subject(metadata["subject"]),
- pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
- lastBuildDate: last_updated |> DateTime.from_unix!(:second),
- author: metadata["creator"],
- language: ISO639.to_iso639_1(metadata["language"]),
- image: %{
- url: fetch_cover(token),
- title: metadata["title"],
- link: Map.get(metadata, "op_link") || link,
- },
- link: Map.get(metadata, "op_link") || link,
- category: Map.get(metadata, "op_category", ""),
- explicit: Map.get(metadata, "op_explicit", "no"),
- }
- end
- defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
- files
- |> filter_audio_files()
- |> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
- end
- defp fetch_archive_metadata(identifier) do
- metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
- {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30_000, recv_timeout: 30_000])
- {:ok, metadata_json} = :hackney.body(client_ref)
- metadata_json |> Jason.decode!()
- end
- defp filter_audio_files(files) do
- files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
- end
- defp to_feed_item(file, identifier, _files) do
- filename = Map.get(file, "name")
- %{
- title: file["title"],
- description: "",
- pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
- link: download_url(identifier, filename),
- length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
- size: file |> Map.get("size"),
- summary: "",
- # image: download_url(identifier, fetch_image_of_audio(filename, files)),
- image: nil,
- keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
- explicit: "no",
- }
- end
- defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
- filename = files
- |> Enum.filter(fn f -> f["source"] == "original" end)
- |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF|Item Image/i end)
- |> List.first()
- |> case do
- nil -> nil
- file -> Map.get(file, "name")
- end
- download_url(identifier, filename)
- end
- # defp fetch_image_of_audio(audio_file, files) do
- # files
- # |> Enum.filter(fn
- # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
- # format =~ ~r/JPG|JPEG|PNG|GIF/i
- # _ -> nil
- # end)
- # |> fetch_image_of_audio()
- # end
- # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
- # defp fetch_image_of_audio(nil), do: nil
- # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
- defp download_url(_identifier, nil), do: nil
- defp download_url(identifier, filename) do
- Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
- end
- defp enrich_with_archive_metadata(token) do
- %Parser{token | archive_metadata: fetch_archive_metadata(token.identifier)}
- end
- defp parse_subject(subject) when is_list(subject), do: subject
- defp parse_subject(subject) when is_binary(subject), do: subject |> String.split(";")
- end
|