defmodule PodcastFeed.Provider.Archive.Parser do alias PodcastFeed.Utility.Format @extra_metadata_url "https://archive.org/download/{identifier}/metadata.json" @archive_metadata_url "http://archive.org/metadata/{identifier}" @download_url "https://archive.org/download/{identifier}/{filename}" @extra_metadata_defaults %{ "link" => "", "image" => %{ "url" => "", "title" => "", "link" => "", }, "category" => "", "explicit" => "", } def by_identifier(identifier) do extra_metadata_json = fetch_extra_metadata(identifier) metadata_json = fetch_archive_metadata(identifier) parse(identifier, metadata_json, extra_metadata_json) end defp fetch_extra_metadata(identifier) do extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier) parse_extra_metadata_response(:hackney.get(extra_metadata_url, [], "", [follow_redirect: true])) end defp parse_extra_metadata_response({:ok, 200, _headers, client_ref}) do {:ok, extra_metadata_json} = :hackney.body(client_ref) extra_metadata_json |> String.split("\n") |> Enum.join() |> Poison.decode!() end defp parse_extra_metadata_response(_), do: @extra_metadata_defaults defp fetch_archive_metadata(identifier) do metadata_url = Format.compile(@archive_metadata_url, identifier: identifier) {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000]) {:ok, metadata_json} = :hackney.body(client_ref) metadata_json |> Poison.decode!() end def parse(identifier, metadata = %{"files" => files}, extra) do extra = files |> fetch_cover(identifier) |> enrich_extra_metadata_with_cover(extra) %{podcast: podcast_data(metadata, extra), items: items_data(metadata, identifier)} end # cover is nil defp enrich_extra_metadata_with_cover(nil, extra), do: extra # cover is found and image is missing in the extra_metadata defp enrich_extra_metadata_with_cover(cover, extra = %{"image" => %{"url" => ""}}) do put_in(extra, ["image", "url"], cover) end # image is already set in the extra_metadata defp enrich_extra_metadata_with_cover(_cover, extra), do: extra defp podcast_data(%{"metadata" => metadata, "item_last_updated" => last_updated}, extra) do %{ title: metadata["title"], description: metadata["description"], webmaster: metadata["uploader"], managingEditor: metadata["uploader"], owner: %{ name: metadata["creator"], email: metadata["uploader"], }, keywords: metadata["subject"], pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"), lastBuildDate: last_updated |> DateTime.from_unix!(:second), author: metadata["creator"], language: metadata["language"], image: %{ url: extra["image"]["url"], title: extra["image"]["title"], link: extra["image"]["link"], }, link: extra["link"], category: extra["category"], explicit: extra["explicit"], } end defp items_data(%{"files" => files}, identifier) do files |> filter_audio_files() |> Enum.map(fn f -> to_feed_item(f, identifier, files) end) end defp filter_audio_files(files) do files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh end defp to_feed_item(file, identifier, files) do filename = Map.get(file, "name") %{ title: file["title"], description: "", pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second), link: download_url(identifier, filename), length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(), size: file |> Map.get("size"), summary: "", image: download_url(identifier, fetch_image_of_audio(filename, files)), keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(), explicit: "no", } end defp fetch_cover(files, identifier) do filename = files |> Enum.filter(fn f -> f["source"] == "original" end) |> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif |> List.first() |> case do nil -> nil file -> Map.get(file, "name") end download_url(identifier, filename) end defp fetch_image_of_audio(audio_file, files) do files |> Enum.filter(fn %{"format" => format, "source" => "derivative", "original" => ^audio_file} -> format =~ ~r/JPG|JPEG|PNG|GIF/i _ -> nil end) |> fetch_image_of_audio() end defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files)) defp fetch_image_of_audio(nil), do: nil defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil) defp download_url(_identifier, nil), do: nil defp download_url(identifier, filename) do Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode() end end