|
@@ -1,68 +1,43 @@
|
|
|
defmodule PodcastFeed.Provider.Archive.Parser do
|
|
|
alias PodcastFeed.Utility.Format
|
|
|
+ alias __MODULE__
|
|
|
|
|
|
- @extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
|
|
|
+ @custom_metadata_url "https://archive.org/download/{identifier}/metadata.json"
|
|
|
@archive_metadata_url "http://archive.org/metadata/{identifier}"
|
|
|
@download_url "https://archive.org/download/{identifier}/{filename}"
|
|
|
+ @podcast_link "https://archive.org/details/{identifier}"
|
|
|
|
|
|
- @extra_metadata_defaults %{
|
|
|
- "link" => "",
|
|
|
+ @custom_metadata_defaults %{
|
|
|
+ "link" => nil,
|
|
|
"image" => %{
|
|
|
- "url" => "",
|
|
|
- "title" => "",
|
|
|
- "link" => "",
|
|
|
+ "url" => nil,
|
|
|
+ "title" => nil,
|
|
|
+ "link" => nil,
|
|
|
},
|
|
|
"category" => "",
|
|
|
- "explicit" => "",
|
|
|
+ "explicit" => "no",
|
|
|
+ "version" => "1",
|
|
|
}
|
|
|
|
|
|
- def by_identifier(identifier) do
|
|
|
- extra_metadata_json = fetch_extra_metadata(identifier)
|
|
|
- metadata_json = fetch_archive_metadata(identifier)
|
|
|
- parse(identifier, metadata_json, extra_metadata_json)
|
|
|
- end
|
|
|
-
|
|
|
- defp fetch_extra_metadata(identifier) do
|
|
|
- extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
|
|
|
- parse_extra_metadata_response(:hackney.get(extra_metadata_url, [], "", [follow_redirect: true]))
|
|
|
- end
|
|
|
-
|
|
|
- defp parse_extra_metadata_response({:ok, 200, _headers, client_ref}) do
|
|
|
- {:ok, extra_metadata_json} = :hackney.body(client_ref)
|
|
|
- extra_metadata_json
|
|
|
- |> String.split("\n")
|
|
|
- |> Enum.join()
|
|
|
- |> Poison.decode!()
|
|
|
- end
|
|
|
- defp parse_extra_metadata_response(_), do: @extra_metadata_defaults
|
|
|
-
|
|
|
- defp fetch_archive_metadata(identifier) do
|
|
|
- metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
|
|
|
- {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
|
|
|
- {:ok, metadata_json} = :hackney.body(client_ref)
|
|
|
- metadata_json |> Poison.decode!()
|
|
|
- end
|
|
|
-
|
|
|
- def parse(identifier, metadata = %{"files" => files}, extra) do
|
|
|
- extra = files
|
|
|
- |> fetch_cover(identifier)
|
|
|
- |> enrich_extra_metadata_with_cover(extra)
|
|
|
+ @enforce_keys [:identifier]
|
|
|
+ defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults]
|
|
|
|
|
|
- %{podcast: podcast_data(metadata, extra), items: items_data(metadata, identifier)}
|
|
|
+ def by_identifier(identifier) do
|
|
|
+ %Parser{identifier: identifier}
|
|
|
+ |> enrich_with_archive_metadata()
|
|
|
+ |> enrich_with_custom_metadata()
|
|
|
+ |> to_podcast_feed_data()
|
|
|
end
|
|
|
|
|
|
- # cover is nil
|
|
|
- defp enrich_extra_metadata_with_cover(nil, extra), do: extra
|
|
|
-
|
|
|
- # cover is found and image is missing in the extra_metadata
|
|
|
- defp enrich_extra_metadata_with_cover(cover, extra = %{"image" => %{"url" => ""}}) do
|
|
|
- put_in(extra, ["image", "url"], cover)
|
|
|
+ def to_podcast_feed_data(token) do
|
|
|
+ %{
|
|
|
+ podcast: podcast_data(token),
|
|
|
+ items: items_data(token)
|
|
|
+ }
|
|
|
end
|
|
|
|
|
|
- # image is already set in the extra_metadata
|
|
|
- defp enrich_extra_metadata_with_cover(_cover, extra), do: extra
|
|
|
-
|
|
|
- defp podcast_data(%{"metadata" => metadata, "item_last_updated" => last_updated}, extra) do
|
|
|
+ defp podcast_data(token = %{custom_metadata: custom, archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do
|
|
|
+ link = Format.compile(@podcast_link, identifier: token.identifier)
|
|
|
%{
|
|
|
title: metadata["title"],
|
|
|
description: metadata["description"],
|
|
@@ -78,28 +53,48 @@ defmodule PodcastFeed.Provider.Archive.Parser do
|
|
|
author: metadata["creator"],
|
|
|
language: metadata["language"],
|
|
|
image: %{
|
|
|
- url: extra["image"]["url"],
|
|
|
- title: extra["image"]["title"],
|
|
|
- link: extra["image"]["link"],
|
|
|
+ url: get_in(custom, ["image", "url"]) || fetch_cover(token),
|
|
|
+ title: get_in(custom, ["image", "title"]) || metadata["title"],
|
|
|
+ link: get_in(custom, ["image", "link"]) || link,
|
|
|
},
|
|
|
- link: extra["link"],
|
|
|
- category: extra["category"],
|
|
|
- explicit: extra["explicit"],
|
|
|
+ link: Map.get(custom, "link") || link,
|
|
|
+ category: Map.get(custom, "category", ""),
|
|
|
+ explicit: Map.get(custom, "explicit", "no"),
|
|
|
}
|
|
|
end
|
|
|
|
|
|
-
|
|
|
- defp items_data(%{"files" => files}, identifier) do
|
|
|
+ defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
|
|
|
files
|
|
|
|> filter_audio_files()
|
|
|
|> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
|
|
|
end
|
|
|
|
|
|
+ defp fetch_custom_metadata(identifier) do
|
|
|
+ custom_metadata_url = Format.compile(@custom_metadata_url, identifier: identifier)
|
|
|
+ parse_custom_metadata_response(:hackney.get(custom_metadata_url, [], "", [follow_redirect: true]))
|
|
|
+ end
|
|
|
+
|
|
|
+ defp parse_custom_metadata_response({:ok, 200, _headers, client_ref}) do
|
|
|
+ {:ok, custom_metadata_json} = :hackney.body(client_ref)
|
|
|
+ custom_metadata_json
|
|
|
+ |> String.split("\n")
|
|
|
+ |> Enum.join()
|
|
|
+ |> Poison.decode!()
|
|
|
+ end
|
|
|
+ defp parse_custom_metadata_response(_), do: @custom_metadata_defaults
|
|
|
+
|
|
|
+ defp fetch_archive_metadata(identifier) do
|
|
|
+ metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
|
|
|
+ {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
|
|
|
+ {:ok, metadata_json} = :hackney.body(client_ref)
|
|
|
+ metadata_json |> Poison.decode!()
|
|
|
+ end
|
|
|
+
|
|
|
defp filter_audio_files(files) do
|
|
|
files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
|
|
|
end
|
|
|
|
|
|
- defp to_feed_item(file, identifier, files) do
|
|
|
+ defp to_feed_item(file, identifier, _files) do
|
|
|
filename = Map.get(file, "name")
|
|
|
%{
|
|
|
title: file["title"],
|
|
@@ -109,16 +104,17 @@ defmodule PodcastFeed.Provider.Archive.Parser do
|
|
|
length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
|
|
|
size: file |> Map.get("size"),
|
|
|
summary: "",
|
|
|
- image: download_url(identifier, fetch_image_of_audio(filename, files)),
|
|
|
+ # image: download_url(identifier, fetch_image_of_audio(filename, files)),
|
|
|
+ image: nil,
|
|
|
keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
|
|
|
explicit: "no",
|
|
|
}
|
|
|
end
|
|
|
|
|
|
- defp fetch_cover(files, identifier) do
|
|
|
+ defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
|
|
|
filename = files
|
|
|
|> Enum.filter(fn f -> f["source"] == "original" end)
|
|
|
- |> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
|
|
|
+ |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF/i end)
|
|
|
|> List.first()
|
|
|
|> case do
|
|
|
nil -> nil
|
|
@@ -128,22 +124,31 @@ defmodule PodcastFeed.Provider.Archive.Parser do
|
|
|
download_url(identifier, filename)
|
|
|
end
|
|
|
|
|
|
- defp fetch_image_of_audio(audio_file, files) do
|
|
|
- files
|
|
|
- |> Enum.filter(fn
|
|
|
- %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
|
|
|
- format =~ ~r/JPG|JPEG|PNG|GIF/i
|
|
|
- _ -> nil
|
|
|
- end)
|
|
|
- |> fetch_image_of_audio()
|
|
|
- end
|
|
|
+ # defp fetch_image_of_audio(audio_file, files) do
|
|
|
+ # files
|
|
|
+ # |> Enum.filter(fn
|
|
|
+ # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
|
|
|
+ # format =~ ~r/JPG|JPEG|PNG|GIF/i
|
|
|
+ # _ -> nil
|
|
|
+ # end)
|
|
|
+ # |> fetch_image_of_audio()
|
|
|
+ # end
|
|
|
|
|
|
- defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
|
|
|
- defp fetch_image_of_audio(nil), do: nil
|
|
|
- defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
|
|
|
+ # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
|
|
|
+ # defp fetch_image_of_audio(nil), do: nil
|
|
|
+ # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
|
|
|
|
|
|
defp download_url(_identifier, nil), do: nil
|
|
|
defp download_url(identifier, filename) do
|
|
|
Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
|
|
|
end
|
|
|
+
|
|
|
+ defp enrich_with_archive_metadata(token) do
|
|
|
+ %Parser{ token | archive_metadata: fetch_archive_metadata(token.identifier) }
|
|
|
+ end
|
|
|
+
|
|
|
+ defp enrich_with_custom_metadata(token) do
|
|
|
+ %Parser{ token | custom_metadata: fetch_custom_metadata(token.identifier) }
|
|
|
+ end
|
|
|
+
|
|
|
end
|