diff --git a/lib/podcast_feed/provider/archive/parser.ex b/lib/podcast_feed/provider/archive/parser.ex
index 63069e3..8afcc6d 100644
--- a/lib/podcast_feed/provider/archive/parser.ex
+++ b/lib/podcast_feed/provider/archive/parser.ex
@@ -1,68 +1,43 @@
defmodule PodcastFeed.Provider.Archive.Parser do
alias PodcastFeed.Utility.Format
+ alias __MODULE__
- @extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
+ @custom_metadata_url "https://archive.org/download/{identifier}/metadata.json"
@archive_metadata_url "http://archive.org/metadata/{identifier}"
@download_url "https://archive.org/download/{identifier}/{filename}"
+ @podcast_link "https://archive.org/details/{identifier}"
- @extra_metadata_defaults %{
- "link" => "",
+ @custom_metadata_defaults %{
+ "link" => nil,
"image" => %{
- "url" => "",
- "title" => "",
- "link" => "",
+ "url" => nil,
+ "title" => nil,
+ "link" => nil,
},
"category" => "",
- "explicit" => "",
+ "explicit" => "no",
+ "version" => "1",
}
+ @enforce_keys [:identifier]
+ defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults]
+
def by_identifier(identifier) do
- extra_metadata_json = fetch_extra_metadata(identifier)
- metadata_json = fetch_archive_metadata(identifier)
- parse(identifier, metadata_json, extra_metadata_json)
+ %Parser{identifier: identifier}
+ |> enrich_with_archive_metadata()
+ |> enrich_with_custom_metadata()
+ |> to_podcast_feed_data()
end
- defp fetch_extra_metadata(identifier) do
- extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
- parse_extra_metadata_response(:hackney.get(extra_metadata_url, [], "", [follow_redirect: true]))
+ def to_podcast_feed_data(token) do
+ %{
+ podcast: podcast_data(token),
+ items: items_data(token)
+ }
end
- defp parse_extra_metadata_response({:ok, 200, _headers, client_ref}) do
- {:ok, extra_metadata_json} = :hackney.body(client_ref)
- extra_metadata_json
- |> String.split("\n")
- |> Enum.join()
- |> Poison.decode!()
- end
- defp parse_extra_metadata_response(_), do: @extra_metadata_defaults
-
- defp fetch_archive_metadata(identifier) do
- metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
- {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
- {:ok, metadata_json} = :hackney.body(client_ref)
- metadata_json |> Poison.decode!()
- end
-
- def parse(identifier, metadata = %{"files" => files}, extra) do
- extra = files
- |> fetch_cover(identifier)
- |> enrich_extra_metadata_with_cover(extra)
-
- %{podcast: podcast_data(metadata, extra), items: items_data(metadata, identifier)}
- end
-
- # cover is nil
- defp enrich_extra_metadata_with_cover(nil, extra), do: extra
-
- # cover is found and image is missing in the extra_metadata
- defp enrich_extra_metadata_with_cover(cover, extra = %{"image" => %{"url" => ""}}) do
- put_in(extra, ["image", "url"], cover)
- end
-
- # image is already set in the extra_metadata
- defp enrich_extra_metadata_with_cover(_cover, extra), do: extra
-
- defp podcast_data(%{"metadata" => metadata, "item_last_updated" => last_updated}, extra) do
+ defp podcast_data(token = %{custom_metadata: custom, archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do
+ link = Format.compile(@podcast_link, identifier: token.identifier)
%{
title: metadata["title"],
description: metadata["description"],
@@ -78,28 +53,48 @@ defmodule PodcastFeed.Provider.Archive.Parser do
author: metadata["creator"],
language: metadata["language"],
image: %{
- url: extra["image"]["url"],
- title: extra["image"]["title"],
- link: extra["image"]["link"],
+ url: get_in(custom, ["image", "url"]) || fetch_cover(token),
+ title: get_in(custom, ["image", "title"]) || metadata["title"],
+ link: get_in(custom, ["image", "link"]) || link,
},
- link: extra["link"],
- category: extra["category"],
- explicit: extra["explicit"],
+ link: Map.get(custom, "link") || link,
+ category: Map.get(custom, "category", ""),
+ explicit: Map.get(custom, "explicit", "no"),
}
end
-
- defp items_data(%{"files" => files}, identifier) do
+ defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
files
|> filter_audio_files()
|> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
end
+ defp fetch_custom_metadata(identifier) do
+ custom_metadata_url = Format.compile(@custom_metadata_url, identifier: identifier)
+ parse_custom_metadata_response(:hackney.get(custom_metadata_url, [], "", [follow_redirect: true]))
+ end
+
+ defp parse_custom_metadata_response({:ok, 200, _headers, client_ref}) do
+ {:ok, custom_metadata_json} = :hackney.body(client_ref)
+ custom_metadata_json
+ |> String.split("\n")
+ |> Enum.join()
+ |> Poison.decode!()
+ end
+ defp parse_custom_metadata_response(_), do: @custom_metadata_defaults
+
+ defp fetch_archive_metadata(identifier) do
+ metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
+ {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
+ {:ok, metadata_json} = :hackney.body(client_ref)
+ metadata_json |> Poison.decode!()
+ end
+
defp filter_audio_files(files) do
files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
end
- defp to_feed_item(file, identifier, files) do
+ defp to_feed_item(file, identifier, _files) do
filename = Map.get(file, "name")
%{
title: file["title"],
@@ -109,16 +104,17 @@ defmodule PodcastFeed.Provider.Archive.Parser do
length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
size: file |> Map.get("size"),
summary: "",
- image: download_url(identifier, fetch_image_of_audio(filename, files)),
+ # image: download_url(identifier, fetch_image_of_audio(filename, files)),
+ image: nil,
keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
explicit: "no",
}
end
- defp fetch_cover(files, identifier) do
+ defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
filename = files
|> Enum.filter(fn f -> f["source"] == "original" end)
- |> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
+ |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF/i end)
|> List.first()
|> case do
nil -> nil
@@ -128,22 +124,31 @@ defmodule PodcastFeed.Provider.Archive.Parser do
download_url(identifier, filename)
end
- defp fetch_image_of_audio(audio_file, files) do
- files
- |> Enum.filter(fn
- %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
- format =~ ~r/JPG|JPEG|PNG|GIF/i
- _ -> nil
- end)
- |> fetch_image_of_audio()
- end
+ # defp fetch_image_of_audio(audio_file, files) do
+ # files
+ # |> Enum.filter(fn
+ # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
+ # format =~ ~r/JPG|JPEG|PNG|GIF/i
+ # _ -> nil
+ # end)
+ # |> fetch_image_of_audio()
+ # end
- defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
- defp fetch_image_of_audio(nil), do: nil
- defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
+ # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
+ # defp fetch_image_of_audio(nil), do: nil
+ # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
defp download_url(_identifier, nil), do: nil
defp download_url(identifier, filename) do
Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
end
+
+ defp enrich_with_archive_metadata(token) do
+ %Parser{ token | archive_metadata: fetch_archive_metadata(token.identifier) }
+ end
+
+ defp enrich_with_custom_metadata(token) do
+ %Parser{ token | custom_metadata: fetch_custom_metadata(token.identifier) }
+ end
+
end
diff --git a/lib/podcast_feed_web/templates/feed/feed.xml.eex b/lib/podcast_feed_web/templates/feed/feed.xml.eex
index d11be5f..794dc22 100644
--- a/lib/podcast_feed_web/templates/feed/feed.xml.eex
+++ b/lib/podcast_feed_web/templates/feed/feed.xml.eex
@@ -41,9 +41,7 @@
<%= item.summary %>
Enum.join(", ") %>]]>
<%= item.explicit %>
- <%= if item.image != nil do %>
-
- <% end %>
+
<% end %>
diff --git a/test/podcast_feed/provider/archive/parser_test.exs b/test/podcast_feed/provider/archive/parser_test.exs
index ec1a284..0274e8c 100644
--- a/test/podcast_feed/provider/archive/parser_test.exs
+++ b/test/podcast_feed/provider/archive/parser_test.exs
@@ -4,14 +4,21 @@ defmodule PodcastFeed.Provider.Archive.ParserTest do
alias PodcastFeed.Provider.Archive.Parser
setup_all do
- valid_json = "{\"created\":1590247789,\"d1\":\"ia601402.us.archive.org\",\"d2\":\"ia801402.us.archive.org\",\"dir\":\"/8/items/incontri-a-piano-terra\",\"files\":[{\"name\": \"metadata.json\",\"source\": \"original\",\"mtime\": \"1590258296\",\"size\": \"315\",\"md5\": \"a0c0e219cf3f13e54f2a4b3efef8e5c8\",\"crc32\": \"2d181b5c\",\"sha1\": \"8244b579a759edddd01c905dd11f8565e83d0898\",\"format\": \"JSON\"},{\"name\": \"cover.jpg\",\"source\": \"original\",\"mtime\": \"1590258445\",\"size\": \"10650\",\"md5\": \"15687b23e11f0099abbfe64eb1685c31\",\"crc32\": \"fbb1516a\",\"sha1\": \"98fa929c7554241cfa92bea8eba69b39c5d47603\",\"format\": \"JPEG\",\"rotation\": \"0\"},{\"name\":\"Confini mobili sulle alpi.mp3\",\"source\":\"original\",\"mtime\":\"1590135989\",\"size\":\"46933494\",\"md5\":\"e832ee9381a4f8af2d9727e2f49126ae\",\"crc32\":\"d709dd90\",\"sha1\":\"89c820a2dfd63cfbbf7aeefd191c653756b33fe3\",\"format\":\"VBR MP3\",\"length\":\"3902.35\",\"height\":\"0\",\"width\":\"0\",\"title\":\"Confini mobili sulle alpi (italian limes)\",\"creator\":\"APE Milano\",\"album\":\"Incontri a Piano Terra\",\"track\":\"02\",\"artist\":\"APE Milano\",\"genre\":\"podcast\"},{\"name\":\"Confini mobili sulle alpi.png\",\"source\":\"derivative\",\"format\":\"PNG\",\"original\":\"Confini mobili sulle alpi.mp3\",\"mtime\":\"1590137809\",\"size\":\"34656\",\"md5\":\"63893f9b00402a107682b5317e808523\",\"crc32\":\"b59ff609\",\"sha1\":\"a396716431cd0acedd243030093d0b31d792cfb3\"},{\"name\":\"Confini mobili sulle alpi_spectrogram.png\",\"source\":\"derivative\",\"format\":\"Spectrogram\",\"original\":\"Confini mobili sulle alpi.mp3\",\"mtime\":\"1590137854\",\"size\":\"273188\",\"md5\":\"557337665c6d9f962b2e91d169f25e1b\",\"crc32\":\"08b4b57c\",\"sha1\":\"88e088f9c4954aa8f0849b7e0d69cee8d7d42327\"}],\"files_count\":31,\"item_last_updated\":1590160774,\"item_size\":244544362,\"metadata\":{\"identifier\":\"incontri-a-piano-terra\",\"mediatype\":\"audio\",\"collection\":\"opensource_audio\",\"creator\":\"APE Milano\",\"description\":\"Qualche registrazione delle attivit\\u00e0 sociali che promuoviamo al Piano Terra di Milano\",\"language\":\"ita\",\"licenseurl\":\"https://creativecommons.org/licenses/by-nc-nd/4.0/\",\"scanner\":\"Internet Archive HTML5 Uploader 1.6.4\",\"subject\":[\"ape milano\",\"podcast\",\"montagna\"],\"title\":\"Incontri a Piano Terra\",\"uploader\":\"milanoape@gmail.com\",\"publicdate\":\"2020-05-22 08:30:21\",\"addeddate\":\"2020-05-22 08:30:21\",\"curation\":\"[curator]validator@archive.org[/curator][date]20200522085526[/date][comment]checked for malware[/comment]\"},\"server\":\"ia601402.us.archive.org\",\"uniq\":122833277,\"workable_servers\":[\"ia601402.us.archive.org\",\"ia801402.us.archive.org\"]}"
- extra = "{\"link\": \"http://www.ape-alveare.it/\",\"image\": {\"url\": \"http://www.ape-alveare.it/wp-content/themes/yootheme/cache/2018_logo_Ape_righe_trasparenza-d1aae6b9.png\",\"title\": \"APE Milano\",\"link\": \"http://www.ape-alveare.it/\"},\"category\": \"Montagna\",\"explicit\": \"no\" }"
+ json_metadata = "{\"created\":1590247789,\"d1\":\"ia601402.us.archive.org\",\"d2\":\"ia801402.us.archive.org\",\"dir\":\"/8/items/incontri-a-piano-terra\",\"files\":[{\"name\": \"metadata.json\",\"source\": \"original\",\"mtime\": \"1590258296\",\"size\": \"315\",\"md5\": \"a0c0e219cf3f13e54f2a4b3efef8e5c8\",\"crc32\": \"2d181b5c\",\"sha1\": \"8244b579a759edddd01c905dd11f8565e83d0898\",\"format\": \"JSON\"},{\"name\": \"cover.jpg\",\"source\": \"original\",\"mtime\": \"1590258445\",\"size\": \"10650\",\"md5\": \"15687b23e11f0099abbfe64eb1685c31\",\"crc32\": \"fbb1516a\",\"sha1\": \"98fa929c7554241cfa92bea8eba69b39c5d47603\",\"format\": \"JPEG\",\"rotation\": \"0\"},{\"name\":\"Confini mobili sulle alpi.mp3\",\"source\":\"original\",\"mtime\":\"1590135989\",\"size\":\"46933494\",\"md5\":\"e832ee9381a4f8af2d9727e2f49126ae\",\"crc32\":\"d709dd90\",\"sha1\":\"89c820a2dfd63cfbbf7aeefd191c653756b33fe3\",\"format\":\"VBR MP3\",\"length\":\"3902.35\",\"height\":\"0\",\"width\":\"0\",\"title\":\"Confini mobili sulle alpi (italian limes)\",\"creator\":\"APE Milano\",\"album\":\"Incontri a Piano Terra\",\"track\":\"02\",\"artist\":\"APE Milano\",\"genre\":\"podcast\"},{\"name\":\"Confini mobili sulle alpi.png\",\"source\":\"derivative\",\"format\":\"PNG\",\"original\":\"Confini mobili sulle alpi.mp3\",\"mtime\":\"1590137809\",\"size\":\"34656\",\"md5\":\"63893f9b00402a107682b5317e808523\",\"crc32\":\"b59ff609\",\"sha1\":\"a396716431cd0acedd243030093d0b31d792cfb3\"},{\"name\":\"Confini mobili sulle alpi_spectrogram.png\",\"source\":\"derivative\",\"format\":\"Spectrogram\",\"original\":\"Confini mobili sulle alpi.mp3\",\"mtime\":\"1590137854\",\"size\":\"273188\",\"md5\":\"557337665c6d9f962b2e91d169f25e1b\",\"crc32\":\"08b4b57c\",\"sha1\":\"88e088f9c4954aa8f0849b7e0d69cee8d7d42327\"}],\"files_count\":31,\"item_last_updated\":1590160774,\"item_size\":244544362,\"metadata\":{\"identifier\":\"incontri-a-piano-terra\",\"mediatype\":\"audio\",\"collection\":\"opensource_audio\",\"creator\":\"APE Milano\",\"description\":\"Qualche registrazione delle attivit\\u00e0 sociali che promuoviamo al Piano Terra di Milano\",\"language\":\"ita\",\"licenseurl\":\"https://creativecommons.org/licenses/by-nc-nd/4.0/\",\"scanner\":\"Internet Archive HTML5 Uploader 1.6.4\",\"subject\":[\"ape milano\",\"podcast\",\"montagna\"],\"title\":\"Incontri a Piano Terra\",\"uploader\":\"milanoape@gmail.com\",\"publicdate\":\"2020-05-22 08:30:21\",\"addeddate\":\"2020-05-22 08:30:21\",\"curation\":\"[curator]validator@archive.org[/curator][date]20200522085526[/date][comment]checked for malware[/comment]\"},\"server\":\"ia601402.us.archive.org\",\"uniq\":122833277,\"workable_servers\":[\"ia601402.us.archive.org\",\"ia801402.us.archive.org\"]}"
+ json_custom = "{\"link\": \"http://www.ape-alveare.it/\",\"image\": {\"url\": \"http://www.ape-alveare.it/wp-content/themes/yootheme/cache/2018_logo_Ape_righe_trasparenza-d1aae6b9.png\",\"title\": \"APE Milano\",\"link\": \"http://www.ape-alveare.it/\"},\"category\": \"Montagna\",\"explicit\": \"no\" }"
- {:ok, json: valid_json, extra: extra}
+ token = %Parser{
+ identifier: "incontri-a-piano-terra",
+ archive_metadata: Poison.decode!(json_metadata),
+ custom_metadata: Poison.decode!(json_custom),
+ }
+
+ {:ok, token: token}
end
test "podcast data are correctly converted", state do
- %{podcast: podcast} = Parser.parse("incontri-a-piano-terra", Poison.decode!(state[:json]), Poison.decode!(state[:extra]))
+ %{podcast: podcast} = Parser.to_podcast_feed_data(state[:token])
+
assert %{
title: "Incontri a Piano Terra",
description: "Qualche registrazione delle attività sociali che promuoviamo al Piano Terra di Milano",
@@ -37,8 +44,33 @@ defmodule PodcastFeed.Provider.Archive.ParserTest do
} == podcast
end
+ test "using first `original` image found on archive when image is missing on custom metadata", state do
+ custom = %{state[:token].custom_metadata | "image" => %{
+ "url" => nil,
+ "title" => nil,
+ "link" => nil,
+ }}
+ %{podcast: podcast} = Parser.to_podcast_feed_data(%Parser{state[:token] | custom_metadata: custom})
+
+ assert %{
+ image: %{
+ url: "https://archive.org/download/incontri-a-piano-terra/cover.jpg",
+ title: "Incontri a Piano Terra",
+ link: "https://archive.org/details/incontri-a-piano-terra",
+ },
+ } = podcast
+ end
+
+ test "using archive item details as link when link is missing on custom metadata", state do
+ custom = %{state[:token].custom_metadata | "link" => nil}
+ %{podcast: podcast} = Parser.to_podcast_feed_data(%Parser{state[:token] | custom_metadata: custom})
+
+ assert %{link: "https://archive.org/details/incontri-a-piano-terra"} = podcast
+ end
+
test "items data are correctly converted", state do
- %{items: items} = Parser.parse("incontri-a-piano-terra", Poison.decode!(state[:json]), Poison.decode!(state[:extra]))
+ %{items: items} = Parser.to_podcast_feed_data(state[:token])
+
assert [
%{
title: "Confini mobili sulle alpi (italian limes)",
@@ -49,7 +81,8 @@ defmodule PodcastFeed.Provider.Archive.ParserTest do
summary: "",
keywords: ["Incontri a Piano Terra", "APE Milano", "podcast"],
explicit: "no",
- image: "https://archive.org/download/incontri-a-piano-terra/Confini%20mobili%20sulle%20alpi.png",
+ # image: "https://archive.org/download/incontri-a-piano-terra/Confini%20mobili%20sulle%20alpi.png",
+ image: nil,
size: "46933494"
}
] == items