parser.ex 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. defmodule PodcastFeed.Provider.Archive.Parser do
  2. @moduledoc """
  3. This module provides a public API for fetching data from archive.org and convert them
  4. in a common podcast data structures.
  5. """
  6. alias PodcastFeed.Utility.Format
  7. alias __MODULE__
  8. @custom_metadata_url "https://archive.org/download/{identifier}/metadata.json"
  9. @archive_metadata_url "http://archive.org/metadata/{identifier}"
  10. @download_url "https://archive.org/download/{identifier}/{filename}"
  11. @podcast_link "https://archive.org/details/{identifier}"
  12. @custom_metadata_defaults %{
  13. "link" => nil,
  14. "image" => %{
  15. "url" => nil,
  16. "title" => nil,
  17. "link" => nil,
  18. },
  19. "category" => "",
  20. "explicit" => "no",
  21. "version" => "1",
  22. }
  23. @enforce_keys [:identifier]
  24. defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults]
  25. def by_identifier(identifier) do
  26. %Parser{identifier: identifier}
  27. |> enrich_with_archive_metadata()
  28. |> enrich_with_custom_metadata()
  29. |> to_podcast_feed_data()
  30. end
  31. def to_podcast_feed_data(token) do
  32. %{
  33. podcast: podcast_data(token),
  34. items: items_data(token)
  35. }
  36. end
  37. defp podcast_data(token = %{custom_metadata: custom, archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do
  38. link = Format.compile(@podcast_link, identifier: token.identifier)
  39. %{
  40. title: metadata["title"],
  41. description: metadata["description"],
  42. webmaster: metadata["uploader"],
  43. managingEditor: metadata["uploader"],
  44. owner: %{
  45. name: metadata["creator"],
  46. email: metadata["uploader"],
  47. },
  48. keywords: metadata["subject"],
  49. pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  50. lastBuildDate: last_updated |> DateTime.from_unix!(:second),
  51. author: metadata["creator"],
  52. language: metadata["language"],
  53. image: %{
  54. url: get_in(custom, ["image", "url"]) || fetch_cover(token),
  55. title: get_in(custom, ["image", "title"]) || metadata["title"],
  56. link: get_in(custom, ["image", "link"]) || link,
  57. },
  58. link: Map.get(custom, "link") || link,
  59. category: Map.get(custom, "category", ""),
  60. explicit: Map.get(custom, "explicit", "no"),
  61. }
  62. end
  63. defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  64. files
  65. |> filter_audio_files()
  66. |> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
  67. end
  68. defp fetch_custom_metadata(identifier) do
  69. custom_metadata_url = Format.compile(@custom_metadata_url, identifier: identifier)
  70. parse_custom_metadata_response(:hackney.get(custom_metadata_url, [], "", [follow_redirect: true]))
  71. end
  72. defp parse_custom_metadata_response({:ok, 200, _headers, client_ref}) do
  73. {:ok, custom_metadata_json} = :hackney.body(client_ref)
  74. custom_metadata_json
  75. |> String.split("\n")
  76. |> Enum.join()
  77. |> Jason.decode!()
  78. end
  79. defp parse_custom_metadata_response(_), do: @custom_metadata_defaults
  80. defp fetch_archive_metadata(identifier) do
  81. metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
  82. {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30_000, recv_timeout: 30_000])
  83. {:ok, metadata_json} = :hackney.body(client_ref)
  84. metadata_json |> Jason.decode!()
  85. end
  86. defp filter_audio_files(files) do
  87. files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
  88. end
  89. defp to_feed_item(file, identifier, _files) do
  90. filename = Map.get(file, "name")
  91. %{
  92. title: file["title"],
  93. description: "",
  94. pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
  95. link: download_url(identifier, filename),
  96. length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
  97. size: file |> Map.get("size"),
  98. summary: "",
  99. # image: download_url(identifier, fetch_image_of_audio(filename, files)),
  100. image: nil,
  101. keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
  102. explicit: "no",
  103. }
  104. end
  105. defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  106. filename = files
  107. |> Enum.filter(fn f -> f["source"] == "original" end)
  108. |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF/i end)
  109. |> List.first()
  110. |> case do
  111. nil -> nil
  112. file -> Map.get(file, "name")
  113. end
  114. download_url(identifier, filename)
  115. end
  116. # defp fetch_image_of_audio(audio_file, files) do
  117. # files
  118. # |> Enum.filter(fn
  119. # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
  120. # format =~ ~r/JPG|JPEG|PNG|GIF/i
  121. # _ -> nil
  122. # end)
  123. # |> fetch_image_of_audio()
  124. # end
  125. # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
  126. # defp fetch_image_of_audio(nil), do: nil
  127. # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
  128. defp download_url(_identifier, nil), do: nil
  129. defp download_url(identifier, filename) do
  130. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  131. end
  132. defp enrich_with_archive_metadata(token) do
  133. %Parser{token | archive_metadata: fetch_archive_metadata(token.identifier)}
  134. end
  135. defp enrich_with_custom_metadata(token) do
  136. %Parser{token | custom_metadata: fetch_custom_metadata(token.identifier)}
  137. end
  138. end