onthisday.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/usr/bin/env python3
  2. """onthisday.py - what REALLY happened in the world, on this day."""
  3. import os
  4. import re
  5. import time
  6. import json
  7. import random
  8. import argparse
  9. import urllib.error
  10. import urllib.request
  11. import markovify
  12. EVENTS_URL = 'https://%s.wikipedia.org/api/rest_v1/feed/onthisday/events'
  13. invalidChars = re.compile('[\[\]\'"\(\)]')
  14. def getEvents(url=None, date=None):
  15. """Return a list of {'year': '1999', 'text': 'an event'} objects."""
  16. if date is None:
  17. date = time.strftime('%m/%d')
  18. if url is None:
  19. url = EVENTS_URL % 'en'
  20. url += '/' + date
  21. req = urllib.request.Request(url)
  22. req.add_header('User-Agent', 'onthisday 1.0')
  23. try:
  24. res = urllib.request.urlopen(req, timeout=60).read().decode('utf-8')
  25. except urllib.error.HTTPError:
  26. print('Error fetching data: connection problems, invalid date or your language may be unsupported, yet')
  27. return []
  28. events = []
  29. jsonRes = json.loads(res)
  30. for event in jsonRes['events']:
  31. try:
  32. year = event.get('year')
  33. if not year:
  34. continue
  35. year = str(year)
  36. text = event.get('text', '').strip()
  37. if not text:
  38. continue
  39. text = invalidChars.sub('', text)
  40. if not text.endswith('.'):
  41. text += '.'
  42. events.append({'year': year, 'text': text})
  43. except Exception:
  44. continue
  45. return events
  46. def markovShuffle(events, howMany=5, chars=200):
  47. models = []
  48. for event in events:
  49. try:
  50. model = markovify.Text(event['text'])
  51. except Exception:
  52. pass
  53. models.append(model)
  54. if not models:
  55. return []
  56. combo = markovify.combine(models)
  57. tries = howMany * 5
  58. onThisDay = []
  59. count = 0
  60. while len(onThisDay) < howMany and count < tries:
  61. count += 1
  62. event = combo.make_short_sentence(chars)
  63. if not event or event in onThisDay:
  64. continue
  65. onThisDay.append(event)
  66. years = random.choices([e.get('year') for e in events], k=len(onThisDay))
  67. try:
  68. years = sorted(years, key=int)
  69. except ValueError:
  70. years = sorted(years)
  71. onThisDay = ['%s - %s' % (years[i], onThisDay[i]) for i in range(len(onThisDay))]
  72. return onThisDay
  73. if __name__ == '__main__':
  74. today = time.strftime('%m/%d')
  75. parser = argparse.ArgumentParser(description='What REALLY happened in the world, on this day.')
  76. parser.add_argument('--lang', default='en', help='language (default: en; not all are supported)')
  77. parser.add_argument('--url', help='complete URL for the query')
  78. parser.add_argument('--date', default=today, help='month/day to use (default: today)')
  79. parser.add_argument('--how-many', type=int, default=5, help='how many events to generate (default: 5)')
  80. parser.add_argument('--chars', type=int, default=200, help='maximum length of each event, excluding the date (default: 200)')
  81. parser.add_argument('--sep', default=os.linesep + os.linesep, help='lines separator (default: \\n\\n)')
  82. args = parser.parse_args()
  83. url = args.url
  84. if not url:
  85. url = EVENTS_URL % args.lang
  86. events = getEvents(url=url, date=args.date)
  87. onThisDay = markovShuffle(events, howMany=args.how_many, chars=args.chars)
  88. print(args.sep.join(onThisDay))