parser.ex 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. defmodule Openpod.Provider.Archive.Parser do
  2. @moduledoc """
  3. This module provides a public API for fetching data from archive.org and convert them
  4. in a common podcast data structures.
  5. """
  6. alias Openpod.Utility.Format
  7. alias __MODULE__
  8. @archive_metadata_url "http://archive.org/metadata/{identifier}"
  9. @download_url "https://archive.org/download/{identifier}/{filename}"
  10. @podcast_link "https://archive.org/details/{identifier}"
  11. @custom_metadata_defaults %{
  12. "link" => nil,
  13. "image" => %{
  14. "url" => nil,
  15. "title" => nil,
  16. "link" => nil,
  17. },
  18. "category" => "",
  19. "explicit" => "no",
  20. "version" => "1",
  21. }
  22. @enforce_keys [:identifier]
  23. defstruct [:identifier, :podcast_data, :archive_metadata, custom_metadata: @custom_metadata_defaults]
  24. def by_identifier(identifier) do
  25. %Parser{identifier: identifier}
  26. |> enrich_with_archive_metadata()
  27. |> to_openpod_data()
  28. end
  29. def to_openpod_data(token) do
  30. %{
  31. podcast: podcast_data(token),
  32. items: items_data(token)
  33. }
  34. end
  35. defp podcast_data(token = %{archive_metadata: %{"metadata" => metadata, "item_last_updated" => last_updated}}) do
  36. link = Format.compile(@podcast_link, identifier: token.identifier)
  37. %{
  38. title: metadata["title"],
  39. description: metadata["description"],
  40. webmaster: metadata["uploader"],
  41. managingEditor: metadata["uploader"],
  42. owner: %{
  43. name: metadata["creator"],
  44. email: metadata["uploader"],
  45. },
  46. keywords: parse_subject(metadata["subject"]),
  47. pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  48. lastBuildDate: last_updated |> DateTime.from_unix!(:second),
  49. author: metadata["creator"],
  50. language: metadata["language"],
  51. image: %{
  52. url: fetch_cover(token),
  53. title: metadata["title"],
  54. link: Map.get(metadata, "op_link") || link,
  55. },
  56. link: Map.get(metadata, "op_link") || link,
  57. category: Map.get(metadata, "op_category", ""),
  58. explicit: Map.get(metadata, "op_explicit", "no"),
  59. }
  60. end
  61. defp items_data(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  62. files
  63. |> filter_audio_files()
  64. |> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
  65. end
  66. defp fetch_archive_metadata(identifier) do
  67. metadata_url = Format.compile(@archive_metadata_url, identifier: identifier)
  68. {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30_000, recv_timeout: 30_000])
  69. {:ok, metadata_json} = :hackney.body(client_ref)
  70. metadata_json |> Jason.decode!()
  71. end
  72. defp filter_audio_files(files) do
  73. files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3/i end) #FIXME:! mp3, ogg, boh
  74. end
  75. defp to_feed_item(file, identifier, _files) do
  76. filename = Map.get(file, "name")
  77. %{
  78. title: file["title"],
  79. description: "",
  80. pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
  81. link: download_url(identifier, filename),
  82. length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
  83. size: file |> Map.get("size"),
  84. summary: "",
  85. # image: download_url(identifier, fetch_image_of_audio(filename, files)),
  86. image: nil,
  87. keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
  88. explicit: "no",
  89. }
  90. end
  91. defp fetch_cover(%{identifier: identifier, archive_metadata: %{"files" => files}}) do
  92. filename = files
  93. |> Enum.filter(fn f -> f["source"] == "original" end)
  94. |> Enum.filter(fn f -> f["format"] =~ ~r/JPG|JPEG|PNG|GIF|Item Image/i end)
  95. |> List.first()
  96. |> case do
  97. nil -> nil
  98. file -> Map.get(file, "name")
  99. end
  100. download_url(identifier, filename)
  101. end
  102. # defp fetch_image_of_audio(audio_file, files) do
  103. # files
  104. # |> Enum.filter(fn
  105. # %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
  106. # format =~ ~r/JPG|JPEG|PNG|GIF/i
  107. # _ -> nil
  108. # end)
  109. # |> fetch_image_of_audio()
  110. # end
  111. # defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
  112. # defp fetch_image_of_audio(nil), do: nil
  113. # defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
  114. defp download_url(_identifier, nil), do: nil
  115. defp download_url(identifier, filename) do
  116. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  117. end
  118. defp enrich_with_archive_metadata(token) do
  119. %Parser{token | archive_metadata: fetch_archive_metadata(token.identifier)}
  120. end
  121. defp parse_subject(subject) when is_list(subject), do: subject
  122. defp parse_subject(subject) when is_binary(subject), do: subject |> String.split(";")
  123. end