onthisday.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/usr/bin/env python3
  2. """onthisday.py - what REALLY happened in the world, on this day."""
  3. import re
  4. import time
  5. import json
  6. import argparse
  7. import urllib.error
  8. import urllib.request
  9. import random
  10. import markovify
  11. EVENTS_URL = 'https://%s.wikipedia.org/api/rest_v1/feed/onthisday/events'
  12. invalidChars = re.compile('[\[\]\'"\(\)]')
  13. def getEvents(url=None, date=None):
  14. """Return a list of {'year': '1999', 'text': 'an event'} objects."""
  15. print(url)
  16. if date is None:
  17. date = time.strftime('%m/%d')
  18. if url is None:
  19. url = EVENTS_URL % 'en'
  20. url += '/' + date
  21. req = urllib.request.Request(url)
  22. req.add_header('User-Agent', 'onthisday 1.0')
  23. try:
  24. res = urllib.request.urlopen(req, timeout=60).read().decode('utf-8')
  25. except urllib.error.HTTPError:
  26. print('Error fetching data: connection problems, invalid date or your language may be unsupported, yet')
  27. return []
  28. events = []
  29. jsonRes = json.loads(res)
  30. for event in jsonRes['events']:
  31. try:
  32. year = event.get('year')
  33. if not year:
  34. continue
  35. year = str(year)
  36. text = event.get('text', '').strip()
  37. if not text:
  38. continue
  39. text = invalidChars.sub('', text)
  40. if not text.endswith('.'):
  41. text += '.'
  42. events.append({'year': year, 'text': text})
  43. except Exception:
  44. continue
  45. return events
  46. def markovShuffle(events, howMany=5, chars=200):
  47. models = []
  48. for event in events:
  49. try:
  50. model = markovify.Text(event['text'])
  51. except Exception:
  52. pass
  53. models.append(model)
  54. if not models:
  55. return []
  56. combo = markovify.combine(models)
  57. tries = howMany * 5
  58. onThisDay = []
  59. count = 0
  60. while len(onThisDay) < howMany and count < tries:
  61. count += 1
  62. event = combo.make_short_sentence(chars)
  63. if not event or event in onThisDay:
  64. continue
  65. onThisDay.append(event)
  66. years = random.choices([e.get('year') for e in events], k=len(onThisDay))
  67. try:
  68. years = sorted(years, key=int)
  69. except ValueError:
  70. years = sorted(years)
  71. onThisDay = ['%s - %s' % (years[i], onThisDay[i]) for i in range(len(onThisDay))]
  72. return onThisDay
  73. if __name__ == '__main__':
  74. today = time.strftime('%m/%d')
  75. parser = argparse.ArgumentParser(description='What REALLY happened in the world, on this day.')
  76. parser.add_argument('--lang', default='en', help='language (default: en; not all are supported)')
  77. parser.add_argument('--url', help='complete URL for the query')
  78. parser.add_argument('--date', default=today, help='month/day to use (default: today)')
  79. parser.add_argument('--how-many', type=int, default=5, help='how many events to generate (default: 5)')
  80. parser.add_argument('--chars', type=int, default=200, help='maximum length of each event, excluding the date (default: 200)')
  81. args = parser.parse_args()
  82. url = args.url
  83. if not url:
  84. url = EVENTS_URL % args.lang
  85. events = getEvents(url=url, date=args.date)
  86. onThisDay = markovShuffle(events, howMany=args.how_many, chars=args.chars)
  87. for event in onThisDay:
  88. print(event)