parser.ex 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. defmodule Openpod.Provider.Archive.Parser do
  2. @moduledoc """
  3. This module provides a public API for fetching data from archive.org and convert them
  4. in a common podcast data structures.
  5. """
  6. alias Openpod.Utility.Format
  7. alias __MODULE__
  8. @archive_metadata_url "http://archive.org/metadata/{identifier}"
  9. @download_url "https://archive.org/download/{identifier}/{filename}"
  10. @podcast_link "https://archive.org/details/{identifier}"
  11. @custom_metadata_defaults %{
  12. "link" => nil,
  13. "image" => %{
  14. "url" => nil,
  15. "title" => nil,
  16. "link" => nil
  17. },
  18. "category" => "",
  19. "explicit" => "no",
  20. "version" => "1"
  21. }
  22. @enforce_keys [:identifier]
  23. defstruct [
  24. :identifier,
  25. :podcast_data,
  26. :archive_metadata,
  27. custom_metadata: @custom_metadata_defaults
  28. ]
  29. def by_identifier(identifier) do
  30. %Parser{identifier: identifier}
  31. |> enrich_with_archive_metadata()
  32. |> to_openpod_data()
  33. end
  34. def to_openpod_data(token) do
  35. %{
  36. podcast: podcast_data(token),
  37. items: items_data(token)
  38. }
  39. end
  40. defp podcast_data(
  41. token = %{
  42. archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}
  43. }
  44. ) do
  45. link = Format.compile(@podcast_link, identifier: token.identifier)
  46. %{
  47. title: metadata["title"],
  48. description: metadata["description"],
  49. webmaster: metadata["uploader"],
  50. managingEditor: metadata["uploader"],
  51. owner: %{
  52. name: metadata["creator"],
  53. email: metadata["uploader"]
  54. },
  55. keywords: parse_subject(metadata["subject"]),
  56. pubDate:
  57. metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  58. lastBuildDate: last_updated |> DateTime.from_unix!(:second),
  59. author: metadata["creator"],
  60. language: ISO639.to_iso639_1(metadata["language"]),
  61. image: %{
  62. url: fetch_cover(token),
  63. title: metadata["title"],
  64. link: Map.get(metadata, "op_link") || link
  65. },
  66. link: Map.get(metadata, "op_link") || link,
  67. category: Map.get(metadata, "op_category", ""),
  68. explicit: Map.get(metadata, "op_explicit", "no")
  69. }
  70. end
  71. defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  72. files
  73. |> filter_audio_files()
  74. |> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
  75. end
  76. defp fetch_archive_metadata(identifier) do
  77. metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
  78. {:ok, 200, _headers, client_ref} =
  79. :hackney.get(metadata_url, [], "",
  80. follow_redirect: true,
  81. connect_timeout: 30_000,
  82. recv_timeout: 30_000
  83. )
  84. {:ok, metadata_json} = :hackney.body(client_ref)
  85. metadata_json |> Jason.decode!()
  86. end
  87. defp filter_audio_files(files) do
  88. # FIXME:! mp3, ogg, boh
  89. files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end)
  90. end
  91. defp to_feed_item(file, identifier, _files) do
  92. filename = Map.get(file, "name")
  93. %{
  94. title: file["title"],
  95. description: "",
  96. pubDate:
  97. file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
  98. link: download_url(identifier, filename),
  99. length: file |> Map.get("length") |> Float.parse() |> elem(0) |> trunc(),
  100. size: file |> Map.get("size"),
  101. summary: "",
  102. # image: download_url(identifier, fetch_image_of_audio(filename, files)),
  103. image: nil,
  104. keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
  105. explicit: "no"
  106. }
  107. end
  108. defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  109. filename =
  110. files
  111. |> Enum.filter(fn f -> f["source"] == "original" end)
  112. |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF|Item Image/i end)
  113. |> List.first()
  114. |> case do
  115. nil -> nil
  116. file -> Map.get(file, "name")
  117. end
  118. download_url(identifier, filename)
  119. end
  120. # defp fetch_image_of_audio(audio_file, files) do
  121. # files
  122. # |> Enum.filter(fn
  123. # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
  124. # format =~ ~r/JPG|JPEG|PNG|GIF/i
  125. # _ -> nil
  126. # end)
  127. # |> fetch_image_of_audio()
  128. # end
  129. # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
  130. # defp fetch_image_of_audio(nil), do: nil
  131. # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
  132. defp download_url(_identifier, nil), do: nil
  133. defp download_url(identifier, filename) do
  134. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  135. end
  136. defp enrich_with_archive_metadata(token) do
  137. %Parser{token | archive_metadata: fetch_archive_metadata(token.identifier)}
  138. end
  139. defp parse_subject(subject) when is_list(subject), do: subject
  140. defp parse_subject(subject) when is_binary(subject), do: subject |> String.split(";")
  141. end