|
@@ -1,130 +1,95 @@
|
|
|
defmodule PodcastFeed.Provider.Archive.Parser do
|
|
|
- def feed(url) do
|
|
|
- url
|
|
|
- |> fetch_xml()
|
|
|
- |> parse()
|
|
|
- |> IO.inspect()
|
|
|
- |> filter_mp3()
|
|
|
- |> compose()
|
|
|
+ alias PodcastFeed.Utility.Format
|
|
|
+
|
|
|
+ @extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
|
|
|
+ @metadata_url "http://archive.org/metadata/{identifier}"
|
|
|
+ @download_url "https://archive.org/download/{identifier}/{filename}"
|
|
|
+
|
|
|
+ def by_identifier(identifier) do
|
|
|
+ extra_metadata_json = fetch_extra_metadata(identifier)
|
|
|
+ metadata_json = fetch_metadata(identifier)
|
|
|
+ parse(identifier, metadata_json, extra_metadata_json)
|
|
|
+ end
|
|
|
+
|
|
|
+ defp fetch_extra_metadata(identifier) do
|
|
|
+ extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
|
|
|
+ {:ok, 200, _headers, client_ref} = :hackney.get(extra_metadata_url, [], "", [follow_redirect: true])
|
|
|
+ {:ok, extra_metadata_json} = :hackney.body(client_ref)
|
|
|
+ extra_metadata_json |> String.split("\n") |> Enum.join() |> Poison.decode!()
|
|
|
end
|
|
|
|
|
|
- defp fetch_xml(url) do
|
|
|
- {:ok, {_, _, xml}} = :httpc.request(:get, {url, []}, [], [body_format: :binary])
|
|
|
- xml
|
|
|
+ defp fetch_metadata(identifier) do
|
|
|
+ metadata_url = Format.compile(@metadata_url, identifier: identifier)
|
|
|
+ metadata_url |> IO.inspect
|
|
|
+ {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
|
|
|
+ {:ok, metadata_json} = :hackney.body(client_ref)
|
|
|
+ metadata_json |> Poison.decode!()
|
|
|
+ end
|
|
|
+
|
|
|
+ def parse(identifier, %{"metadata" => metadata, "files" => files}, extra) do
|
|
|
+ _image = files |> fetch_image(identifier)
|
|
|
+
|
|
|
+ %{podcast: podcast_data(metadata, extra), items: items_data(files, identifier)}
|
|
|
end
|
|
|
|
|
|
- defp parse(xml) do
|
|
|
- xml |> XmlToMap.naive_map() |> Map.get("files") |> Map.get("file") |> Enum.map(fn f -> Map.get(f, "#content") |> Map.put("filename", Map.get(f, "-name")) end)
|
|
|
+ defp fetch_image(files, identifier) do
|
|
|
+ filename = files
|
|
|
+ |> Enum.filter(fn f -> f["source"] == "original" end)
|
|
|
+ |> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
|
|
|
+ |> List.first()
|
|
|
+ |> Map.get("name")
|
|
|
+
|
|
|
+ Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
|
|
|
end
|
|
|
|
|
|
- defp filter_mp3(files) do
|
|
|
- files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end)
|
|
|
+ defp podcast_data(metadata, extra) do
|
|
|
+ %{
|
|
|
+ title: metadata["title"],
|
|
|
+ description: metadata["description"],
|
|
|
+ webmaster: metadata["uploader"],
|
|
|
+ managingEditor: metadata["uploader"],
|
|
|
+ owner: %{
|
|
|
+ name: metadata["creator"],
|
|
|
+ email: metadata["uploader"],
|
|
|
+ },
|
|
|
+ keywords: metadata["subject"],
|
|
|
+ pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
|
|
|
+ lastBuildDate: metadata["addeddate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
|
|
|
+ author: metadata["creator"],
|
|
|
+ language: metadata["language"],
|
|
|
+ image: %{
|
|
|
+ url: extra["image"]["url"],
|
|
|
+ title: extra["image"]["title"],
|
|
|
+ link: extra["image"]["link"],
|
|
|
+ },
|
|
|
+ link: extra["link"],
|
|
|
+ category: extra["category"],
|
|
|
+ explicit: extra["explicit"],
|
|
|
+ }
|
|
|
end
|
|
|
|
|
|
- defp compose(files) do
|
|
|
+ defp items_data(files, identifier) do
|
|
|
files
|
|
|
- |> Enum.map(&to_feed_item/1)
|
|
|
+ |> filter_audio_files()
|
|
|
+ |> Enum.map(fn f -> to_feed_item(f, identifier) end)
|
|
|
end
|
|
|
|
|
|
- # <item>
|
|
|
- # <title>Episode Name 2</title>
|
|
|
- # <link>
|
|
|
- # http://podcast.example.com/episode2.mp4
|
|
|
- # </link>
|
|
|
- # <pubDate>Sat, 02 Jan 2016 16:00:00 PDT</pubDate>
|
|
|
- # <description>
|
|
|
- # The full length episode 2 description
|
|
|
- # </description>
|
|
|
- # <enclosure url="http://podcasts.example.com/episode.mp4" length="36715125" type="audio/mpeg"/>
|
|
|
- # <guid>
|
|
|
- # http://podcast.example.com/episode2.mp4
|
|
|
- # </guid>
|
|
|
- # <itunes:duration>19:07</itunes:duration>
|
|
|
- # <itunes:summary>
|
|
|
- # The full length episode 2 description
|
|
|
- # </itunes:summary>
|
|
|
- # <itunes:image href="http://www.example.com/image3000x3000.png"/>
|
|
|
- # <itunes:keywords>
|
|
|
- # comma,separated,key,words
|
|
|
- # </itunes:keywords>
|
|
|
- # <itunes:explicit>no</itunes:explicit>
|
|
|
- # </item>
|
|
|
-# %{
|
|
|
-# "album" => "Incontri al Piano Terra",
|
|
|
-# "artist" => "APE Milano",
|
|
|
-# "crc32" => "f1820595",
|
|
|
-# "creator" => "APE Milano",
|
|
|
-# "format" => "VBR MP3",
|
|
|
-# "genre" => "podcast",
|
|
|
-# "height" => "0",
|
|
|
-# "length" => "3943.31",
|
|
|
-# "md5" => "9ca26043a3e82e6f86c3a9309b88f4f5",
|
|
|
-# "mtime" => "1590154757",
|
|
|
-# "sha1" => "dcacfa46fcad1d656312784ad06886b5614c6420",
|
|
|
-# "size" => "47148690",
|
|
|
-# "title" => "Presentazione di Montagna femminile plurale con N1DM",
|
|
|
-# "track" => "03",
|
|
|
-# "width" => "0"
|
|
|
-# }
|
|
|
+ defp filter_audio_files(files) do
|
|
|
+ files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
|
|
|
+ end
|
|
|
|
|
|
- defp to_feed_item(file) do
|
|
|
+ defp to_feed_item(file, identifier) do
|
|
|
+ filename = Map.get(file, "name")
|
|
|
%{
|
|
|
- title: file |> Map.get("title"),
|
|
|
- link: "http://archive.org/download/incontri-a-piano-terra/" <> (file |> Map.get("filename")) |> URI.encode(), #FIXME:! identifier should by dynamic
|
|
|
- pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
|
|
|
+ title: file["title"],
|
|
|
description: "",
|
|
|
- length: (file |> Map.get("length") |> Float.parse() |> elem(0)) * 100,
|
|
|
- guid: "",
|
|
|
- duration: "",
|
|
|
+ pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
|
|
|
+ link: Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode(),
|
|
|
+ length: (file |> Map.get("length") |> Float.parse() |> elem(0)) * 100 |> trunc(),
|
|
|
summary: "",
|
|
|
- image: "",
|
|
|
+ # image: "", #FIXME:! take the image from other files
|
|
|
keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
|
|
|
explicit: "no",
|
|
|
}
|
|
|
end
|
|
|
end
|
|
|
-
|
|
|
-# <?xml version="1.0" encoding="utf-8"?>
|
|
|
-# <rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:itunesu="http://www.itunesu.com/feed" version="2.0">
|
|
|
-# <channel>
|
|
|
-# <link>http://www.YourSite.com</link>
|
|
|
-# <language>en-us</language>
|
|
|
-# <copyright>©2013</copyright>
|
|
|
-# <webMaster>your@email.com (Your Name)</webMaster>
|
|
|
-# <managingEditor>your@email.com (Your Name)</managingEditor>
|
|
|
-# <image>
|
|
|
-# <url>http://www.YourSite.com/ImageSize300X300.jpg</url>
|
|
|
-# <title>Title or description of your logo</title>
|
|
|
-# <link>http://www.YourSite.com</link>
|
|
|
-# </image>
|
|
|
-# <itunes:owner>
|
|
|
-# <itunes:name>Your Name</itunes:name>
|
|
|
-# <itunes:email>your@email.com</itunes:email>
|
|
|
-# </itunes:owner>
|
|
|
-# <itunes:category text="Education">
|
|
|
-# <itunes:category text="Higher Education" />
|
|
|
-# </itunes:category>
|
|
|
-# <itunes:keywords>separate, by, comma, and, space</itunes:keywords>
|
|
|
-# <itunes:explicit>no</itunes:explicit>
|
|
|
-# <itunes:image href="http://www.YourSite.com/ImageSize300X300.jpg" />
|
|
|
-# <atom:link href="http://www.YourSite.com/feed.xml" rel="self" type="application/rss+xml" />
|
|
|
-# <pubDate>Sun, 01 Jan 2012 00:00:00 EST</pubDate>
|
|
|
-# <title>Verbose title of the podcast</title>
|
|
|
-# <itunes:author>College, school, or department owning the podcast</itunes:author>
|
|
|
-# <description>Verbose description of the podcast.</description>
|
|
|
-# <itunes:summary>Duplicate of above verbose description.</itunes:summary>
|
|
|
-# <itunes:subtitle>Short description of the podcast - 255 character max.</itunes:subtitle>
|
|
|
-# <lastBuildDate>Thu, 02 Feb 2012 00:00:00 EST</lastBuildDate>
|
|
|
-# <item>
|
|
|
-# <title>Verbose title of the episode</title>
|
|
|
-# <description>Verbose description of the episode.</description>
|
|
|
-# <itunes:summary>Duplicate of above verbose description.</itunes:summary>
|
|
|
-# <itunes:subtitle>Short description of the episode - 255 character max.</itunes:subtitle>
|
|
|
-# <itunesu:category itunesu:code="112" />
|
|
|
-# <enclosure url="http://www.YourSite.com/FILE.EXT" type="audio/mpeg" length="1" />
|
|
|
-# <guid>http://www.YourSite.com/FILE.EXT</guid>
|
|
|
-# <itunes:duration>H:MM:SS</itunes:duration>
|
|
|
-# <pubDate>Thu, 02 Feb 2012 00:00:00 EST</pubDate>
|
|
|
-# </item>
|
|
|
-# </channel>
|
|
|
-# </rss>
|