CollegeDeFranceBridge.php 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. <?php
  2. class CollegeDeFranceBridge extends BridgeAbstract {
  3. const MAINTAINER = 'pit-fgfjiudghdf';
  4. const NAME = 'CollegeDeFrance';
  5. const URI = 'http://www.college-de-france.fr/';
  6. const CACHE_TIMEOUT = 10800; // 3h
  7. const DESCRIPTION = 'Returns the latest audio and video from CollegeDeFrance';
  8. public function collectData(){
  9. $months = array(
  10. '01' => 'janv.',
  11. '02' => 'févr.',
  12. '03' => 'mars',
  13. '04' => 'avr.',
  14. '05' => 'mai',
  15. '06' => 'juin',
  16. '07' => 'juil.',
  17. '08' => 'août',
  18. '09' => 'sept.',
  19. '10' => 'oct.',
  20. '11' => 'nov.',
  21. '12' => 'déc.'
  22. );
  23. // The "API" used by the site returns a list of partial HTML in this form
  24. /* <li>
  25. * <a href="/site/thomas-romer/guestlecturer-2016-04-15-14h30.htm" data-target="after">
  26. * <span class="date"><span class="list-icon list-icon-video"></span>
  27. * <span class="list-icon list-icon-audio"></span>15 avr. 2016</span>
  28. * <span class="lecturer">Christopher Hays</span>
  29. * <span class='title'>Imagery of Divine Suckling in the Hebrew Bible and the Ancient Near East</span>
  30. * </a>
  31. * </li>
  32. */
  33. $html = getSimpleHTMLDOM(self::URI
  34. . 'components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all')
  35. or returnServerError('Could not request CollegeDeFrance.');
  36. foreach($html->find('a[data-target]') as $element) {
  37. $item = array();
  38. $item['title'] = $element->find('.title', 0)->plaintext;
  39. // Most relative URLs contains an hour in addition to the date, so let's use it
  40. // <a href="/site/yann-lecun/course-2016-04-08-11h00.htm" data-target="after">
  41. //
  42. // Sometimes there's an __1, perhaps it signifies an update
  43. // "/site/patrick-boucheron/seminar-2016-05-03-18h00__1.htm"
  44. //
  45. // But unfortunately some don't have any hours info
  46. // <a href="/site/institut-physique/
  47. // The-Mysteries-of-Decoherence-Sebastien-Gleyzes-[Video-3-35].htm" data-target="after">
  48. $timezone = new DateTimeZone('Europe/Paris');
  49. // strpos($element->href, '201') will break in 2020 but it'll
  50. // probably break prior to then due to site changes anyway
  51. $d = DateTime::createFromFormat(
  52. '!Y-m-d-H\hi',
  53. substr($element->href, strpos($element->href, '201'), 16),
  54. $timezone
  55. );
  56. if(!$d) {
  57. $d = DateTime::createFromFormat(
  58. '!d m Y',
  59. trim(str_replace(
  60. array_values($months),
  61. array_keys($months),
  62. $element->find('.date', 0)->plaintext
  63. )),
  64. $timezone
  65. );
  66. }
  67. $item['timestamp'] = $d->format('U');
  68. $item['content'] = $element->find('.lecturer', 0)->innertext
  69. . ' - '
  70. . $element->find('.title', 0)->innertext;
  71. $item['uri'] = self::URI . $element->href;
  72. $this->items[] = $item;
  73. }
  74. }
  75. }