1
0

CollegeDeFranceBridge.php 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. <?php
  2. class CollegeDeFranceBridge extends BridgeAbstract{
  3. public function loadMetadatas() {
  4. $this->maintainer = "pit-fgfjiudghdf";
  5. $this->name = "CollegeDeFrance";
  6. $this->uri = "http://www.college-de-france.fr/";
  7. $this->description = "Returns the latest audio and video from CollegeDeFrance";
  8. $this->update = "2016-05-01";
  9. }
  10. public function collectData(array $param) {
  11. $months = array(
  12. '01' => 'janv.',
  13. '02' => 'févr.',
  14. '03' => 'mars',
  15. '04' => 'avr.',
  16. '05' => 'mai',
  17. '06' => 'juin',
  18. '07' => 'juil.',
  19. '08' => 'août',
  20. '09' => 'sept.',
  21. '10' => 'oct.',
  22. '11' => 'nov.',
  23. '12' => 'déc.'
  24. );
  25. // The "API" used by the site returns a list of partial HTML in this form
  26. /* <li>
  27. * <a href="/site/thomas-romer/guestlecturer-2016-04-15-14h30.htm" data-target="after">
  28. * <span class="date"><span class="list-icon list-icon-video"></span><span class="list-icon list-icon-audio"></span>15 avr. 2016</span>
  29. * <span class="lecturer">Christopher Hays</span>
  30. * <span class='title'>Imagery of Divine Suckling in the Hebrew Bible and the Ancient Near East</span>
  31. * </a>
  32. * </li>
  33. */
  34. $html = file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404);
  35. foreach($html->find('a[data-target]') as $element) {
  36. $item = new \Item();
  37. $item->title = $element->find('.title', 0)->plaintext;
  38. // Most relative URLs contains an hour in addition to the date, so let's use it
  39. // <a href="/site/yann-lecun/course-2016-04-08-11h00.htm" data-target="after">
  40. //
  41. // Sometimes there's an __1, perhaps it signifies an update "/site/patrick-boucheron/seminar-2016-05-03-18h00__1.htm"
  42. //
  43. // But unfortunately some don't have any hours info
  44. // <a href="/site/institut-physique/The-Mysteries-of-Decoherence-Sebastien-Gleyzes-[Video-3-35].htm" data-target="after">
  45. $timezone = new DateTimeZone('Europe/Paris');
  46. // strpos($element->href, '201') will break in 2020 but it'll probably break prior to then due to site changes anyway
  47. $d = DateTime::createFromFormat('!Y-m-d-H\hi', substr($element->href, strpos($element->href, '201'), 16), $timezone) ?: DateTime::createFromFormat('!d m Y', trim(str_replace(array_values($months), array_keys($months), $element->find('.date', 0)->plaintext)), $timezone);
  48. $item->timestamp = $d->format('U');
  49. $item->content = $element->find('.lecturer', 0)->innertext . ' - ' . $element->find('.title', 0)->innertext;
  50. $item->uri = 'http://www.college-de-france.fr' . $element->href;
  51. $this->items[] = $item;
  52. }
  53. }
  54. public function getName(){
  55. return 'CollegeDeFrance';
  56. }
  57. public function getURI(){
  58. return 'http://www.college-de-france.fr/';
  59. }
  60. public function getCacheDuration(){
  61. return 3600*3; // 3 hours
  62. }
  63. }