parser.ex 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. defmodule PodcastFeed.Provider.Archive.Parser do
  2. alias PodcastFeed.Utility.Format
  3. @extra_metadata_url "https://archive.org/download/{identifier}/metadata.json"
  4. @metadata_url "http://archive.org/metadata/{identifier}"
  5. @download_url "https://archive.org/download/{identifier}/{filename}"
  6. def by_identifier(identifier) do
  7. extra_metadata_json = fetch_extra_metadata(identifier)
  8. metadata_json = fetch_metadata(identifier)
  9. parse(identifier, metadata_json, extra_metadata_json)
  10. end
  11. defp fetch_extra_metadata(identifier) do
  12. extra_metadata_url = Format.compile(@extra_metadata_url, identifier: identifier)
  13. case :hackney.get(extra_metadata_url, [], "", [follow_redirect: true]) do
  14. {:ok, 200, _headers, client_ref} ->
  15. {:ok, extra_metadata_json} = :hackney.body(client_ref)
  16. extra_metadata_json |> String.split("\n") |> Enum.join() |> Poison.decode!()
  17. _ -> %{
  18. "link" => "",
  19. "image" => %{
  20. "url" => "",
  21. "title" => "",
  22. "link" => "",
  23. },
  24. "category" => "",
  25. "explicit" => "",
  26. }
  27. end
  28. end
  29. defp fetch_metadata(identifier) do
  30. metadata_url = Format.compile(@metadata_url, identifier: identifier)
  31. {:ok, 200, _headers, client_ref} = :hackney.get(metadata_url, [], "", [follow_redirect: true, connect_timeout: 30000, recv_timeout: 30000])
  32. {:ok, metadata_json} = :hackney.body(client_ref)
  33. metadata_json |> Poison.decode!()
  34. end
  35. def parse(identifier, %{"metadata" => metadata, "files" => files}, extra) do
  36. # cover = files |> fetch_cover(identifier)
  37. %{podcast: podcast_data(metadata, extra), items: items_data(files, identifier)}
  38. end
  39. defp fetch_cover(files, identifier) do
  40. filename = files
  41. |> Enum.filter(fn f -> f["source"] == "original" end)
  42. |> Enum.filter(fn f -> f["format"] == "JPEG" end) #FIXME:! jpg, png, gif
  43. |> List.first()
  44. |> Map.get("name")
  45. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  46. end
  47. defp podcast_data(metadata, extra) do
  48. %{
  49. title: metadata["title"],
  50. description: metadata["description"],
  51. webmaster: metadata["uploader"],
  52. managingEditor: metadata["uploader"],
  53. owner: %{
  54. name: metadata["creator"],
  55. email: metadata["uploader"],
  56. },
  57. keywords: metadata["subject"],
  58. pubDate: metadata["publicdate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  59. lastBuildDate: metadata["addeddate"] |> NaiveDateTime.from_iso8601!() |> DateTime.from_naive!("Etc/UTC"),
  60. author: metadata["creator"],
  61. language: metadata["language"],
  62. image: %{
  63. url: extra["image"]["url"],
  64. title: extra["image"]["title"],
  65. link: extra["image"]["link"],
  66. },
  67. link: extra["link"],
  68. category: extra["category"],
  69. explicit: extra["explicit"],
  70. }
  71. end
  72. defp items_data(files, identifier) do
  73. files
  74. |> filter_audio_files()
  75. |> Enum.map(fn f -> to_feed_item(f, identifier, files) end)
  76. end
  77. defp filter_audio_files(files) do
  78. files |> Enum.filter(fn f -> Map.get(f, "format") =~ ~r/MP3|OGG/i end) #FIXME:! mp3, ogg, boh
  79. end
  80. defp to_feed_item(file, identifier, files) do
  81. filename = Map.get(file, "name")
  82. %{
  83. title: file["title"],
  84. description: "",
  85. pubDate: file |> Map.get("mtime") |> Integer.parse() |> elem(0) |> DateTime.from_unix!(:second),
  86. link: download_url(identifier, filename),
  87. length: (file |> Map.get("length") |> Float.parse() |> elem(0)) |> trunc(),
  88. size: file |> Map.get("size"),
  89. summary: "",
  90. image: download_url(identifier, fetch_image_of_audio(filename, files)),
  91. keywords: file |> Map.take(["album", "artist", "genre"]) |> Map.values(),
  92. explicit: "no",
  93. }
  94. end
  95. defp fetch_image_of_audio(audio_file, files) do
  96. files
  97. |> Enum.filter(fn
  98. %{"format" => format, "source" => "derivative", "original" => ^audio_file} ->
  99. format =~ ~r/JPG|JPEG|PNG|GIF/i
  100. _ -> nil
  101. end)
  102. |> fetch_image_of_audio()
  103. end
  104. defp fetch_image_of_audio(image_files) when is_list(image_files), do: fetch_image_of_audio(List.first(image_files))
  105. defp fetch_image_of_audio(nil), do: nil
  106. defp fetch_image_of_audio(image_file), do: image_file |> Map.get("name", nil)
  107. defp download_url(_identifier, nil), do: nil
  108. defp download_url(identifier, filename) do
  109. Format.compile(@download_url, identifier: identifier, filename: filename) |> URI.encode()
  110. end
  111. end