open-pod/lib/podcast_feed/provider/archive/parser.ex

149 lines
5 KiB
Elixir

defmodule PodcastFeed.Provider.Archive.Parser do
alias PodcastFeed.Utility.Format
@extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
@archive_metadata_url "http://archive.org/metadata/{identifier}"
@download_url "https://archive.org/download/{identifier}/{filename}"
@extra_metadata_defaults %{
"link" => "",
"image" => %{
"url" => "",
"title" => "",
"link" => "",
},
"category" => "",
"explicit" => "",
}
def by_identifier(identifier) do
extra_metadata_json = fetch_extra_metadata(identifier)
metadata_json = fetch_archive_metadata(identifier)
parse(identifier, metadata_json, extra_metadata_json)
end
defp fetch_extra_metadata(identifier) do
extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
parse_extra_metadata_response(:hackney.get(extra_metadata_url, [], "", [follow_redirect: true]))
end
defp parse_extra_metadata_response({:ok, 200, _headers, client_ref}) do
{:ok, extra_metadata_json} = :hackney.body(client_ref)
extra_metadata_json
|> String.split("\n")
|> Enum.join()
|> Poison.decode!()
end
defp parse_extra_metadata_response(_), do: @extra_metadata_defaults
defp fetch_archive_metadata(identifier) do
metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
{:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
{:ok, metadata_json} = :hackney.body(client_ref)
metadata_json |> Poison.decode!()
end
def parse(identifier, metadata = %{"files" => files}, extra) do
extra = files
|> fetch_cover(identifier)
|> enrich_extra_metadata_with_cover(extra)
%{podcast: podcast_data(metadata, extra), items: items_data(metadata, identifier)}
end
# cover is nil
defp enrich_extra_metadata_with_cover(nil, extra), do: extra
# cover is found and image is missing in the extra_metadata
defp enrich_extra_metadata_with_cover(cover, extra = %{"image" => %{"url" => ""}}) do
put_in(extra, ["image", "url"], cover)
end
# image is already set in the extra_metadata
defp enrich_extra_metadata_with_cover(_cover, extra), do: extra
defp podcast_data(%{"metadata" => metadata, "item_last_updated" => last_updated}, extra) do
%{
title: metadata["title"],
description: metadata["description"],
webmaster: metadata["uploader"],
managingEditor: metadata["uploader"],
owner: %{
name: metadata["creator"],
email: metadata["uploader"],
},
keywords: metadata["subject"],
pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
lastBuildDate: last_updated |> DateTime.from_unix!(:second),
author: metadata["creator"],
language: metadata["language"],
image: %{
url: extra["image"]["url"],
title: extra["image"]["title"],
link: extra["image"]["link"],
},
link: extra["link"],
category: extra["category"],
explicit: extra["explicit"],
}
end
defp items_data(%{"files" => files}, identifier) do
files
|> filter_audio_files()
|> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
end
defp filter_audio_files(files) do
files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
end
defp to_feed_item(file, identifier, files) do
filename = Map.get(file, "name")
%{
title: file["title"],
description: "",
pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
link: download_url(identifier, filename),
length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
size: file |> Map.get("size"),
summary: "",
image: download_url(identifier, fetch_image_of_audio(filename, files)),
keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
explicit: "no",
}
end
defp fetch_cover(files, identifier) do
filename = files
|> Enum.filter(fn f -> f["source"] == "original" end)
|> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
|> List.first()
|> case do
nil -> nil
file -> Map.get(file, "name")
end
download_url(identifier, filename)
end
defp fetch_image_of_audio(audio_file, files) do
files
|> Enum.filter(fn
%{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
format =~ ~r/JPG|JPEG|PNG|GIF/i
_ -> nil
end)
|> fetch_image_of_audio()
end
defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
defp fetch_image_of_audio(nil), do: nil
defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
defp download_url(_identifier, nil), do: nil
defp download_url(identifier, filename) do
Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
end
end