2020-05-22 22:04:15 +02:00
|
|
|
defmodule PodcastFeed.Provider.Archive.Parser do
|
2020-05-23 23:51:56 +02:00
|
|
|
alias PodcastFeed.Utility.Format
|
|
|
|
|
|
|
|
@extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
|
|
|
|
@metadata_url "http://archive.org/metadata/{identifier}"
|
|
|
|
@download_url "https://archive.org/download/{identifier}/{filename}"
|
|
|
|
|
|
|
|
def by_identifier(identifier) do
|
|
|
|
extra_metadata_json = fetch_extra_metadata(identifier)
|
|
|
|
metadata_json = fetch_metadata(identifier)
|
|
|
|
parse(identifier, metadata_json, extra_metadata_json)
|
|
|
|
end
|
|
|
|
|
|
|
|
defp fetch_extra_metadata(identifier) do
|
|
|
|
extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
|
2020-05-24 09:10:22 +02:00
|
|
|
case :hackney.get(extra_metadata_url, [], "", [follow_redirect: true]) do
|
|
|
|
{:ok, 200, _headers, client_ref} ->
|
|
|
|
{:ok, extra_metadata_json} = :hackney.body(client_ref)
|
|
|
|
extra_metadata_json |> String.split("\n") |> Enum.join() |> Poison.decode!()
|
|
|
|
_ -> %{
|
|
|
|
"link" => "",
|
|
|
|
"image" => %{
|
|
|
|
"url" => "",
|
|
|
|
"title" => "",
|
|
|
|
"link" => "",
|
|
|
|
},
|
|
|
|
"category" => "",
|
|
|
|
"explicit" => "",
|
|
|
|
}
|
|
|
|
end
|
2020-05-22 22:04:15 +02:00
|
|
|
end
|
|
|
|
|
2020-05-23 23:51:56 +02:00
|
|
|
defp fetch_metadata(identifier) do
|
|
|
|
metadata_url = Format.compile(@metadata_url, identifier: identifier)
|
|
|
|
metadata_url |> IO.inspect
|
|
|
|
{:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
|
|
|
|
{:ok, metadata_json} = :hackney.body(client_ref)
|
|
|
|
metadata_json |> Poison.decode!()
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse(identifier, %{"metadata" => metadata, "files" => files}, extra) do
|
2020-05-24 09:10:22 +02:00
|
|
|
# cover = files |> fetch_cover(identifier)
|
2020-05-23 23:51:56 +02:00
|
|
|
%{podcast: podcast_data(metadata, extra), items: items_data(files, identifier)}
|
2020-05-22 22:04:15 +02:00
|
|
|
end
|
|
|
|
|
2020-05-24 09:10:22 +02:00
|
|
|
defp fetch_cover(files, identifier) do
|
2020-05-23 23:51:56 +02:00
|
|
|
filename = files
|
|
|
|
|> Enum.filter(fn f -> f["source"] == "original" end)
|
|
|
|
|> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
|
|
|
|
|> List.first()
|
|
|
|
|> Map.get("name")
|
|
|
|
|
|
|
|
Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
|
2020-05-22 22:04:15 +02:00
|
|
|
end
|
|
|
|
|
2020-05-23 23:51:56 +02:00
|
|
|
defp podcast_data(metadata, extra) do
|
|
|
|
%{
|
|
|
|
title: metadata["title"],
|
|
|
|
description: metadata["description"],
|
|
|
|
webmaster: metadata["uploader"],
|
|
|
|
managingEditor: metadata["uploader"],
|
|
|
|
owner: %{
|
|
|
|
name: metadata["creator"],
|
|
|
|
email: metadata["uploader"],
|
|
|
|
},
|
|
|
|
keywords: metadata["subject"],
|
|
|
|
pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
|
|
|
|
lastBuildDate: metadata["addeddate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
|
|
|
|
author: metadata["creator"],
|
|
|
|
language: metadata["language"],
|
|
|
|
image: %{
|
|
|
|
url: extra["image"]["url"],
|
|
|
|
title: extra["image"]["title"],
|
|
|
|
link: extra["image"]["link"],
|
|
|
|
},
|
|
|
|
link: extra["link"],
|
|
|
|
category: extra["category"],
|
|
|
|
explicit: extra["explicit"],
|
|
|
|
}
|
2020-05-22 22:04:15 +02:00
|
|
|
end
|
|
|
|
|
2020-05-23 23:51:56 +02:00
|
|
|
defp items_data(files, identifier) do
|
2020-05-22 22:04:15 +02:00
|
|
|
files
|
2020-05-23 23:51:56 +02:00
|
|
|
|> filter_audio_files()
|
2020-05-24 23:37:18 +02:00
|
|
|
|> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
|
2020-05-22 22:04:15 +02:00
|
|
|
end
|
|
|
|
|
2020-05-23 23:51:56 +02:00
|
|
|
defp filter_audio_files(files) do
|
2020-05-24 23:05:02 +02:00
|
|
|
files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3|OGG/i end) #FIXME:! mp3, ogg, boh
|
2020-05-23 23:51:56 +02:00
|
|
|
end
|
2020-05-22 22:04:15 +02:00
|
|
|
|
2020-05-24 23:37:18 +02:00
|
|
|
defp to_feed_item(file, identifier, files) do
|
2020-05-23 23:51:56 +02:00
|
|
|
filename = Map.get(file, "name")
|
2020-05-22 22:04:15 +02:00
|
|
|
%{
|
2020-05-23 23:51:56 +02:00
|
|
|
title: file["title"],
|
2020-05-22 22:04:15 +02:00
|
|
|
description: "",
|
2020-05-23 23:51:56 +02:00
|
|
|
pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
|
|
|
|
link: Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode(),
|
2020-05-24 23:01:58 +02:00
|
|
|
length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
|
2020-05-24 21:54:35 +02:00
|
|
|
size: file |> Map.get("size"),
|
2020-05-22 22:04:15 +02:00
|
|
|
summary: "",
|
2020-05-24 23:47:38 +02:00
|
|
|
image: Format.compile(@download_url, identifier: identifier, filename: fetch_image_of_audio(Map.get(file, "name"), files)) |> URI.encode(),
|
2020-05-22 22:04:15 +02:00
|
|
|
keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
|
|
|
|
explicit: "no",
|
|
|
|
}
|
|
|
|
end
|
2020-05-24 23:37:18 +02:00
|
|
|
|
|
|
|
defp fetch_image_of_audio(audio_file, files) do
|
|
|
|
files
|
|
|
|
|> Enum.filter(fn
|
|
|
|
%{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
|
|
|
|
format =~ ~r/JPG|JPEG|PNG|GIF/i
|
|
|
|
_ -> nil
|
|
|
|
end)
|
|
|
|
|> List.first()
|
|
|
|
|> Map.get("name", nil)
|
|
|
|
end
|
2020-05-22 22:04:15 +02:00
|
|
|
end
|