defmodule PodcastFeed.Provider.Archive.Parser do @moduledoc """ This module provides a public API for fetching data from archive.org and convert them in a common podcast data structures. """ alias PodcastFeed.Utility.Format alias __MODULE__ @archive_metadata_url "http://archive.org/metadata/{identifier}" @download_url "https://archive.org/download/{identifier}/{filename}" @podcast_link "https://archive.org/details/{identifier}" @custom_metadata_defaults %{ "link" => nil, "image" => %{ "url" => nil, "title" => nil, "link" => nil, }, "category" => "", "explicit" => "no", "version" => "1", } @enforce_keys [:identifier] defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults] def by_identifier(identifier) do %Parser{identifier: identifier} |> enrich_with_archive_metadata() |> to_podcast_feed_data() end def to_podcast_feed_data(token) do %{ podcast: podcast_data(token), items: items_data(token) } end defp podcast_data(token = %{archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do link = Format.compile(@podcast_link, identifier: token.identifier) %{ title: metadata["title"], description: metadata["description"], webmaster: metadata["uploader"], managingEditor: metadata["uploader"], owner: %{ name: metadata["creator"], email: metadata["uploader"], }, keywords: parse_subject(metadata["subject"]), pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"), lastBuildDate: last_updated |> DateTime.from_unix!(:second), author: metadata["creator"], language: metadata["language"], image: %{ url: fetch_cover(token), title: metadata["title"], link: Map.get(metadata, "op_link") || link, }, link: Map.get(metadata, "op_link") || link, category: Map.get(metadata, "op_category", ""), explicit: Map.get(metadata, "op_explicit", "no"), } end defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do files |> filter_audio_files() |> Enum.map(fn f -> to_feed_item(f, identifier, files) end) end defp fetch_archive_metadata(identifier) do metadata_url = Format.compile(@archive_metadata_url, identifier: identifier) {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30_000, recv_timeout: 30_000]) {:ok, metadata_json} = :hackney.body(client_ref) metadata_json |> Jason.decode!() end defp filter_audio_files(files) do files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh end defp to_feed_item(file, identifier, _files) do filename = Map.get(file, "name") %{ title: file["title"], description: "", pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second), link: download_url(identifier, filename), length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(), size: file |> Map.get("size"), summary: "", # image: download_url(identifier, fetch_image_of_audio(filename, files)), image: nil, keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(), explicit: "no", } end defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do filename = files |> Enum.filter(fn f -> f["source"] == "original" end) |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF|Item Image/i end) |> List.first() |> case do nil -> nil file -> Map.get(file, "name") end download_url(identifier, filename) end # defp fetch_image_of_audio(audio_file, files) do # files # |> Enum.filter(fn # %{"format" => format, "source" => "derivative", "original" => ^audio_file} -> # format =~ ~r/JPG|JPEG|PNG|GIF/i # _ -> nil # end) # |> fetch_image_of_audio() # end # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files)) # defp fetch_image_of_audio(nil), do: nil # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil) defp download_url(_identifier, nil), do: nil defp download_url(identifier, filename) do Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode() end defp enrich_with_archive_metadata(token) do %Parser{token | archive_metadata: fetch_archive_metadata(token.identifier)} end defp parse_subject(subject) when is_list(subject), do: subject defp parse_subject(subject) when is_binary(subject), do: subject |> String.split(";") end