parser.ex 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. defmodule PodcastFeed.Provider.Archive.Parser do
  2. alias PodcastFeed.Utility.Format
  3. @extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
  4. @metadata_url "http://archive.org/metadata/{identifier}"
  5. @download_url "https://archive.org/download/{identifier}/{filename}"
  6. def by_identifier(identifier) do
  7. extra_metadata_json = fetch_extra_metadata(identifier)
  8. metadata_json = fetch_metadata(identifier)
  9. parse(identifier, metadata_json, extra_metadata_json)
  10. end
  11. defp fetch_extra_metadata(identifier) do
  12. extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
  13. {:ok, 200, _headers, client_ref} = :hackney.get(extra_metadata_url, [], "", [follow_redirect: true])
  14. {:ok, extra_metadata_json} = :hackney.body(client_ref)
  15. extra_metadata_json |> String.split("\n") |> Enum.join() |> Poison.decode!()
  16. end
  17. defp fetch_metadata(identifier) do
  18. metadata_url = Format.compile(@metadata_url, identifier: identifier)
  19. metadata_url |> IO.inspect
  20. {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
  21. {:ok, metadata_json} = :hackney.body(client_ref)
  22. metadata_json |> Poison.decode!()
  23. end
  24. def parse(identifier, %{"metadata" => metadata, "files" => files}, extra) do
  25. _image = files |> fetch_image(identifier)
  26. %{podcast: podcast_data(metadata, extra), items: items_data(files, identifier)}
  27. end
  28. defp fetch_image(files, identifier) do
  29. filename = files
  30. |> Enum.filter(fn f -> f["source"] == "original" end)
  31. |> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
  32. |> List.first()
  33. |> Map.get("name")
  34. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  35. end
  36. defp podcast_data(metadata, extra) do
  37. %{
  38. title: metadata["title"],
  39. description: metadata["description"],
  40. webmaster: metadata["uploader"],
  41. managingEditor: metadata["uploader"],
  42. owner: %{
  43. name: metadata["creator"],
  44. email: metadata["uploader"],
  45. },
  46. keywords: metadata["subject"],
  47. pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  48. lastBuildDate: metadata["addeddate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  49. author: metadata["creator"],
  50. language: metadata["language"],
  51. image: %{
  52. url: extra["image"]["url"],
  53. title: extra["image"]["title"],
  54. link: extra["image"]["link"],
  55. },
  56. link: extra["link"],
  57. category: extra["category"],
  58. explicit: extra["explicit"],
  59. }
  60. end
  61. defp items_data(files, identifier) do
  62. files
  63. |> filter_audio_files()
  64. |> Enum.map(fn f -> to_feed_item(f, identifier) end)
  65. end
  66. defp filter_audio_files(files) do
  67. files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
  68. end
  69. defp to_feed_item(file, identifier) do
  70. filename = Map.get(file, "name")
  71. %{
  72. title: file["title"],
  73. description: "",
  74. pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
  75. link: Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode(),
  76. length: (file |> Map.get("length") |> Float.parse() |> elem(0)) * 100 |> trunc(),
  77. summary: "",
  78. # image: "", #FIXME:! take the image from other files
  79. keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
  80. explicit: "no",
  81. }
  82. end
  83. end