feed 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. #!/usr/bin/env python3
  2. import os
  3. from argparse import ArgumentParser
  4. from subprocess import check_output
  5. from collections import OrderedDict
  6. import re
  7. from lxml import html
  8. import requests
  9. class Audio(object):
  10. def __init__(self, url, durata=None):
  11. self.url = url
  12. if durata is None:
  13. durata = get_duration(url.encode('utf-8'))
  14. self.durata = durata
  15. def __str__(self):
  16. return self.url
  17. def __repr__(self):
  18. return '<Audio {} ({})>'.format(self.url, self.durata)
  19. class AudioGroup(list):
  20. def __init__(self, description=None):
  21. self.description = description or ''
  22. self.audios = []
  23. def __len__(self):
  24. return len(self.audios)
  25. def append(self, arg):
  26. self.audios.append(arg)
  27. def __str__(self):
  28. return '\n'.join(str(a) for a in self.audios)
  29. def __repr__(self):
  30. return '<AudioGroup "{}" ({})\n{}>'.format(self.description,
  31. self.durata,
  32. '\n'.join(repr(a) for a in self.audios))
  33. @property
  34. def durata(self):
  35. return sum(a.durata for a in self.audios if a.durata is not None)
  36. def get_tree(feed_url):
  37. if feed_url.startswith('http:') or feed_url.startswith('https:'):
  38. tree = html.fromstring(requests.get(feed_url).content)
  39. else:
  40. if not os.path.exists(feed_url):
  41. raise ValueError("file not found: {}".format(feed_url))
  42. tree = html.parse(open(feed_url))
  43. return tree
  44. def get_audio_from_description(text):
  45. # non-empty lines
  46. lines = [line.strip()
  47. for line in text.split('\n')
  48. if line.strip()]
  49. url = lines[0]
  50. durata = None
  51. if len(lines) > 1:
  52. durata = int(re.findall(r'\d+', lines[1].split('=')[1].strip())[0])
  53. return Audio(url, durata)
  54. def get_urls(tree):
  55. urls = tree.xpath('//item/description')
  56. for url_elem in urls:
  57. yield get_audio_from_description(url_elem.text)
  58. def get_grouped_urls(tree):
  59. groups = OrderedDict()
  60. items = tree.xpath('//item')
  61. for item in items:
  62. guid = item.xpath('guid')[0].text.strip()
  63. if guid not in groups:
  64. groups[guid] = AudioGroup(guid)
  65. groups[guid].append(get_audio_from_description(
  66. item.xpath('description')[0].text))
  67. return groups
  68. def get_duration(url):
  69. lineout = check_output(['ffprobe', '-v', 'error',
  70. '-show_entries', 'format=duration',
  71. '-i', url]).split(b'\n')
  72. duration = next(l for l in lineout if l.startswith(b'duration='))
  73. value = duration.split(b'=')[1]
  74. return int(float(value))
  75. def get_parser():
  76. p = ArgumentParser('Get music from a (well-specified) xml feed')
  77. p.add_argument('--start', default=0, type=int,
  78. help='0-indexed start number. '
  79. 'By default, play from most recent')
  80. p.add_argument('--max-len', default=0, type=int,
  81. help='Exclude any audio that is longer than MAXLEN seconds')
  82. p.add_argument('--howmany', default=1, type=int,
  83. help='If not specified, only 1 will be played')
  84. p.add_argument('--slotsize', help='Seconds between each audio', type=int)
  85. p.add_argument('--group', help='Group articles', default=False,
  86. action='store_true')
  87. p.add_argument('--debug', help='Debug messages', default=False,
  88. action='store_true')
  89. p.add_argument('url')
  90. return p
  91. def main():
  92. args = get_parser().parse_args()
  93. # download the feed
  94. tree = get_tree(args.url)
  95. if not args.group:
  96. # get audio urls, removing those that are too long
  97. audios = [audio for audio in get_urls(tree)
  98. if args.max_len == 0 or
  99. audio.durata <= args.max_len]
  100. audios = audios[args.start:args.start+args.howmany]
  101. else:
  102. groups = get_grouped_urls(tree)
  103. audios = [groups[g] for g in groups.keys()
  104. if args.max_len == 0 or
  105. groups[g].durata <= args.max_len
  106. ][args.start:args.start+args.howmany]
  107. # the for loop excludes the last one
  108. # this is to support the --slotsize option
  109. if not audios:
  110. return
  111. for audio in audios[:-1]:
  112. if args.debug:
  113. print(repr(audio))
  114. else:
  115. print(audio)
  116. if args.slotsize is not None:
  117. duration = audio.durata
  118. if duration < args.slotsize:
  119. print('## musica per {} secondi'
  120. .format(args.slotsize - duration))
  121. # finally, the last one
  122. if args.debug:
  123. print(repr(audios[-1]))
  124. else:
  125. print(audios[-1])
  126. # else: # grouping; TODO: support slotsize
  127. # for item in groups:
  128. # if args.debug:
  129. # print('#', item, groups[item].durata)
  130. # print(groups[item])
  131. if __name__ == '__main__':
  132. main()