feed 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #!/usr/bin/env python3
  2. # pip install lxml requests
  3. import os
  4. from argparse import ArgumentParser
  5. from subprocess import check_output
  6. from collections import OrderedDict
  7. import re
  8. import urllib.request
  9. from urllib.parse import urlparse, unquote
  10. import posixpath
  11. import random
  12. from lxml import html
  13. import requests
  14. class Audio(object):
  15. def __init__(self, url, durata=None):
  16. self.url = url
  17. if durata is None:
  18. durata = get_duration(url.encode('utf-8'))
  19. self.durata = durata
  20. def __str__(self):
  21. return self.url
  22. def __repr__(self):
  23. return '<Audio {} ({})>'.format(self.url, self.durata)
  24. @property
  25. def urls(self):
  26. return [self.url]
  27. class AudioGroup(list):
  28. def __init__(self, description=None):
  29. self.description = description or ''
  30. self.audios = []
  31. def __len__(self):
  32. return len(self.audios)
  33. def append(self, arg):
  34. self.audios.append(arg)
  35. def __str__(self):
  36. return '\n'.join(str(a) for a in self.audios)
  37. def __repr__(self):
  38. return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
  39. self.durata,
  40. '\n'.join(repr(a) for a in self.audios))
  41. @property
  42. def durata(self):
  43. return sum(a.durata for a in self.audios if a.durata is not None)
  44. @property
  45. def urls(self):
  46. return [a.url for a in self.audios]
  47. def get_tree(feed_url):
  48. if feed_url.startswith('http:') or feed_url.startswith('https:'):
  49. tree = html.fromstring(requests.get(feed_url).content)
  50. else:
  51. if not os.path.exists(feed_url):
  52. raise ValueError("file not found: {}".format(feed_url))
  53. tree = html.parse(open(feed_url))
  54. return tree
  55. def get_audio_from_description(text):
  56. # non-empty lines
  57. lines = [line.strip()
  58. for line in text.split('\n')
  59. if line.strip()]
  60. url = lines[0]
  61. durata = None
  62. if len(lines) > 1:
  63. durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
  64. return Audio(unquote(url), durata)
  65. def get_urls(tree):
  66. urls = tree.xpath('//item/description')
  67. for url_elem in urls:
  68. yield get_audio_from_description(url_elem.text)
  69. def get_grouped_urls(tree):
  70. groups = OrderedDict()
  71. items = tree.xpath('//item')
  72. for item in items:
  73. guid = item.xpath('guid')[0].text.strip()
  74. if guid not in groups:
  75. groups[guid] = AudioGroup(guid)
  76. groups[guid].append(get_audio_from_description(
  77. item.xpath('description')[0].text))
  78. return groups
  79. def get_duration(url):
  80. lineout = check_output(['ffprobe', '-v', 'error',
  81. '-show_entries', 'format=duration',
  82. '-i', url]).split(b'\n')
  83. duration = next(l for l in lineout if l.startswith(b'duration='))
  84. value = duration.split(b'=')[1]
  85. return int(float(value))
  86. def get_parser():
  87. p = ArgumentParser('Get music from a (well-specified) xml feed')
  88. p.add_argument('--start', default=0, type=int,
  89. help='0-indexed start number. '
  90. 'By default, play from most recent')
  91. p.add_argument('--max-len', default=0, type=int,
  92. help='Exclude any audio that is longer than MAXLEN seconds')
  93. p.add_argument('--random', default=False,
  94. action='store_true', help='Pick randomly')
  95. p.add_argument('--howmany', default=1, type=int,
  96. help='If not specified, only 1 will be played')
  97. p.add_argument('--slotsize', help='Seconds between each audio', type=int)
  98. p.add_argument('--group', help='Group articles', default=False,
  99. action='store_true')
  100. p.add_argument('--copy', help='Copy files to $TMPDIR', default=False,
  101. action='store_true')
  102. p.add_argument('--debug', help='Debug messages', default=False,
  103. action='store_true')
  104. p.add_argument('url')
  105. return p
  106. def put(audio, copy=False):
  107. if not copy:
  108. for url in audio.urls:
  109. print(url)
  110. else:
  111. for url in audio.urls:
  112. destdir = (os.environ.get('TMPDIR', '.'))
  113. fname = posixpath.basename(urlparse(url).path)
  114. # sanitize
  115. fname = "".join(c for c in fname
  116. if c.isalnum() or c in list('._-')).rstrip()
  117. dest = os.path.join(destdir, fname)
  118. os.makedirs(destdir, exist_ok=True)
  119. fname, headers = urllib.request.urlretrieve(url, dest)
  120. print('file://%s' % os.path.realpath(fname))
  121. def main():
  122. args = get_parser().parse_args()
  123. # download the feed
  124. tree = get_tree(args.url)
  125. if not args.group:
  126. # get audio urls, removing those that are too long
  127. audios = [audio for audio in get_urls(tree)
  128. if args.max_len == 0 or
  129. audio.durata <= args.max_len]
  130. else:
  131. groups = get_grouped_urls(tree)
  132. audios = [groups[g] for g in groups.keys()
  133. if args.max_len == 0 or
  134. groups[g].durata <= args.max_len
  135. ]
  136. audios = audios[args.start:]
  137. if args.random:
  138. random.shuffle(audios)
  139. audios = audios[:args.howmany]
  140. # the for loop excludes the last one
  141. # this is to support the --slotsize option
  142. if not audios:
  143. return
  144. for audio in audios[:-1]:
  145. if args.debug:
  146. print(repr(audio))
  147. else:
  148. put(audio, args.copy)
  149. if args.slotsize is not None:
  150. duration = audio.durata
  151. if duration < args.slotsize:
  152. print('## musica per {} secondi'
  153. .format(args.slotsize - duration))
  154. # finally, the last one
  155. if args.debug:
  156. print(repr(audios[-1]))
  157. else:
  158. put(audios[-1], args.copy)
  159. # else: # grouping; TODO: support slotsize
  160. # for item in groups:
  161. # if args.debug:
  162. # print('#', item, groups[item].durata)
  163. # print(groups[item])
  164. if __name__ == '__main__':
  165. main()