parser.ex 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. defmodule PodcastFeed.Provider.Archive.Parser do
  2. alias PodcastFeed.Utility.Format
  3. alias __MODULE__
  4. @custom_metadata_url "https://archive.org/download/{identifier}/metadata.json"
  5. @archive_metadata_url "http://archive.org/metadata/{identifier}"
  6. @download_url "https://archive.org/download/{identifier}/{filename}"
  7. @podcast_link "https://archive.org/details/{identifier}"
  8. @custom_metadata_defaults %{
  9. "link" => nil,
  10. "image" => %{
  11. "url" => nil,
  12. "title" => nil,
  13. "link" => nil,
  14. },
  15. "category" => "",
  16. "explicit" => "no",
  17. "version" => "1",
  18. }
  19. @enforce_keys [:identifier]
  20. defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults]
  21. def by_identifier(identifier) do
  22. %Parser{identifier: identifier}
  23. |> enrich_with_archive_metadata()
  24. |> enrich_with_custom_metadata()
  25. |> to_podcast_feed_data()
  26. end
  27. def to_podcast_feed_data(token) do
  28. %{
  29. podcast: podcast_data(token),
  30. items: items_data(token)
  31. }
  32. end
  33. defp podcast_data(token = %{custom_metadata: custom, archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do
  34. link = Format.compile(@podcast_link, identifier: token.identifier)
  35. %{
  36. title: metadata["title"],
  37. description: metadata["description"],
  38. webmaster: metadata["uploader"],
  39. managingEditor: metadata["uploader"],
  40. owner: %{
  41. name: metadata["creator"],
  42. email: metadata["uploader"],
  43. },
  44. keywords: metadata["subject"],
  45. pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  46. lastBuildDate: last_updated |> DateTime.from_unix!(:second),
  47. author: metadata["creator"],
  48. language: metadata["language"],
  49. image: %{
  50. url: get_in(custom, ["image", "url"]) || fetch_cover(token),
  51. title: get_in(custom, ["image", "title"]) || metadata["title"],
  52. link: get_in(custom, ["image", "link"]) || link,
  53. },
  54. link: Map.get(custom, "link") || link,
  55. category: Map.get(custom, "category", ""),
  56. explicit: Map.get(custom, "explicit", "no"),
  57. }
  58. end
  59. defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  60. files
  61. |> filter_audio_files()
  62. |> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
  63. end
  64. defp fetch_custom_metadata(identifier) do
  65. custom_metadata_url = Format.compile(@custom_metadata_url, identifier: identifier)
  66. parse_custom_metadata_response(:hackney.get(custom_metadata_url, [], "", [follow_redirect: true]))
  67. end
  68. defp parse_custom_metadata_response({:ok, 200, _headers, client_ref}) do
  69. {:ok, custom_metadata_json} = :hackney.body(client_ref)
  70. custom_metadata_json
  71. |> String.split("\n")
  72. |> Enum.join()
  73. |> Poison.decode!()
  74. end
  75. defp parse_custom_metadata_response(_), do: @custom_metadata_defaults
  76. defp fetch_archive_metadata(identifier) do
  77. metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
  78. {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
  79. {:ok, metadata_json} = :hackney.body(client_ref)
  80. metadata_json |> Poison.decode!()
  81. end
  82. defp filter_audio_files(files) do
  83. files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
  84. end
  85. defp to_feed_item(file, identifier, _files) do
  86. filename = Map.get(file, "name")
  87. %{
  88. title: file["title"],
  89. description: "",
  90. pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
  91. link: download_url(identifier, filename),
  92. length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
  93. size: file |> Map.get("size"),
  94. summary: "",
  95. # image: download_url(identifier, fetch_image_of_audio(filename, files)),
  96. image: nil,
  97. keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
  98. explicit: "no",
  99. }
  100. end
  101. defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  102. filename = files
  103. |> Enum.filter(fn f -> f["source"] == "original" end)
  104. |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF/i end)
  105. |> List.first()
  106. |> case do
  107. nil -> nil
  108. file -> Map.get(file, "name")
  109. end
  110. download_url(identifier, filename)
  111. end
  112. # defp fetch_image_of_audio(audio_file, files) do
  113. # files
  114. # |> Enum.filter(fn
  115. # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
  116. # format =~ ~r/JPG|JPEG|PNG|GIF/i
  117. # _ -> nil
  118. # end)
  119. # |> fetch_image_of_audio()
  120. # end
  121. # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
  122. # defp fetch_image_of_audio(nil), do: nil
  123. # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
  124. defp download_url(_identifier, nil), do: nil
  125. defp download_url(identifier, filename) do
  126. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  127. end
  128. defp enrich_with_archive_metadata(token) do
  129. %Parser{ token | archive_metadata: fetch_archive_metadata(token.identifier) }
  130. end
  131. defp enrich_with_custom_metadata(token) do
  132. %Parser{ token | custom_metadata: fetch_custom_metadata(token.identifier) }
  133. end
  134. end