feed 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. #!/usr/bin/env python3
  2. # pip install lxml requests
  3. import os
  4. from argparse import ArgumentParser
  5. from subprocess import check_output
  6. from collections import OrderedDict
  7. import re
  8. import urllib.request
  9. from urllib.parse import urlparse, unquote
  10. import posixpath
  11. from lxml import html
  12. import requests
  13. class Audio(object):
  14. def __init__(self, url, durata=None):
  15. self.url = url
  16. if durata is None:
  17. durata = get_duration(url.encode('utf-8'))
  18. self.durata = durata
  19. def __str__(self):
  20. return self.url
  21. def __repr__(self):
  22. return '<Audio {} ({})>'.format(self.url, self.durata)
  23. @property
  24. def urls(self):
  25. return [self.url]
  26. class AudioGroup(list):
  27. def __init__(self, description=None):
  28. self.description = description or ''
  29. self.audios = []
  30. def __len__(self):
  31. return len(self.audios)
  32. def append(self, arg):
  33. self.audios.append(arg)
  34. def __str__(self):
  35. return '\n'.join(str(a) for a in self.audios)
  36. def __repr__(self):
  37. return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
  38. self.durata,
  39. '\n'.join(repr(a) for a in self.audios))
  40. @property
  41. def durata(self):
  42. return sum(a.durata for a in self.audios if a.durata is not None)
  43. @property
  44. def urls(self):
  45. return [a.url for a in self.audios]
  46. def get_tree(feed_url):
  47. if feed_url.startswith('http:') or feed_url.startswith('https:'):
  48. tree = html.fromstring(requests.get(feed_url).content)
  49. else:
  50. if not os.path.exists(feed_url):
  51. raise ValueError("file not found: {}".format(feed_url))
  52. tree = html.parse(open(feed_url))
  53. return tree
  54. def get_audio_from_description(text):
  55. # non-empty lines
  56. lines = [line.strip()
  57. for line in text.split('\n')
  58. if line.strip()]
  59. url = lines[0]
  60. durata = None
  61. if len(lines) > 1:
  62. durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
  63. return Audio(unquote(url), durata)
  64. def get_urls(tree):
  65. urls = tree.xpath('//item/description')
  66. for url_elem in urls:
  67. yield get_audio_from_description(url_elem.text)
  68. def get_grouped_urls(tree):
  69. groups = OrderedDict()
  70. items = tree.xpath('//item')
  71. for item in items:
  72. guid = item.xpath('guid')[0].text.strip()
  73. if guid not in groups:
  74. groups[guid] = AudioGroup(guid)
  75. groups[guid].append(get_audio_from_description(
  76. item.xpath('description')[0].text))
  77. return groups
  78. def get_duration(url):
  79. lineout = check_output(['ffprobe', '-v', 'error',
  80. '-show_entries', 'format=duration',
  81. '-i', url]).split(b'\n')
  82. duration = next(l for l in lineout if l.startswith(b'duration='))
  83. value = duration.split(b'=')[1]
  84. return int(float(value))
  85. def get_parser():
  86. p = ArgumentParser('Get music from a (well-specified) xml feed')
  87. p.add_argument('--start', default=0, type=int,
  88. help='0-indexed start number. '
  89. 'By default, play from most recent')
  90. p.add_argument('--max-len', default=0, type=int,
  91. help='Exclude any audio that is longer than MAXLEN seconds')
  92. p.add_argument('--howmany', default=1, type=int,
  93. help='If not specified, only 1 will be played')
  94. p.add_argument('--slotsize', help='Seconds between each audio', type=int)
  95. p.add_argument('--group', help='Group articles', default=False,
  96. action='store_true')
  97. p.add_argument('--copy', help='Copy files to $TMPDIR', default=False,
  98. action='store_true')
  99. p.add_argument('--debug', help='Debug messages', default=False,
  100. action='store_true')
  101. p.add_argument('url')
  102. return p
  103. def put(audio, copy=False):
  104. if not copy:
  105. for url in audio.urls:
  106. print(url)
  107. else:
  108. for url in audio.urls:
  109. destdir = (os.environ.get('TMPDIR', '.'))
  110. fname = posixpath.basename(urlparse(url).path)
  111. # sanitize
  112. fname = "".join(c for c in fname
  113. if c.isalnum() or c in list('._-')).rstrip()
  114. dest = os.path.join(destdir, fname)
  115. os.makedirs(destdir, exist_ok=True)
  116. fname, headers = urllib.request.urlretrieve(url, dest)
  117. print('file://%s' % os.path.realpath(fname))
  118. def main():
  119. args = get_parser().parse_args()
  120. # download the feed
  121. tree = get_tree(args.url)
  122. if not args.group:
  123. # get audio urls, removing those that are too long
  124. audios = [audio for audio in get_urls(tree)
  125. if args.max_len == 0 or
  126. audio.durata <= args.max_len]
  127. audios = audios[args.start:args.start+args.howmany]
  128. else:
  129. groups = get_grouped_urls(tree)
  130. audios = [groups[g] for g in groups.keys()
  131. if args.max_len == 0 or
  132. groups[g].durata <= args.max_len
  133. ][args.start:args.start+args.howmany]
  134. # the for loop excludes the last one
  135. # this is to support the --slotsize option
  136. if not audios:
  137. return
  138. for audio in audios[:-1]:
  139. if args.debug:
  140. print(repr(audio))
  141. else:
  142. put(audio, args.copy)
  143. if args.slotsize is not None:
  144. duration = audio.durata
  145. if duration < args.slotsize:
  146. print('## musica per {} secondi'
  147. .format(args.slotsize - duration))
  148. # finally, the last one
  149. if args.debug:
  150. print(repr(audios[-1]))
  151. else:
  152. put(audios[-1], args.copy)
  153. # else: # grouping; TODO: support slotsize
  154. # for item in groups:
  155. # if args.debug:
  156. # print('#', item, groups[item].durata)
  157. # print(groups[item])
  158. if __name__ == '__main__':
  159. main()