maintainer = "pit-fgfjiudghdf";
$this->name = "CollegeDeFrance";
$this->uri = "http://www.college-de-france.fr/";
$this->description = "Returns the latest audio and video from CollegeDeFrance";
}
public function collectData(){
$months = array(
'01' => 'janv.',
'02' => 'févr.',
'03' => 'mars',
'04' => 'avr.',
'05' => 'mai',
'06' => 'juin',
'07' => 'juil.',
'08' => 'août',
'09' => 'sept.',
'10' => 'oct.',
'11' => 'nov.',
'12' => 'déc.'
);
// The "API" used by the site returns a list of partial HTML in this form
/*
*
* 15 avr. 2016
* Christopher Hays
* Imagery of Divine Suckling in the Hebrew Bible and the Ancient Near East
*
*
*/
$html = $this->getSimpleHTMLDOM('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.');
foreach($html->find('a[data-target]') as $element) {
$item = array();
$item['title'] = $element->find('.title', 0)->plaintext;
// Most relative URLs contains an hour in addition to the date, so let's use it
//
//
// Sometimes there's an __1, perhaps it signifies an update "/site/patrick-boucheron/seminar-2016-05-03-18h00__1.htm"
//
// But unfortunately some don't have any hours info
//
$timezone = new DateTimeZone('Europe/Paris');
// strpos($element->href, '201') will break in 2020 but it'll probably break prior to then due to site changes anyway
$d = DateTime::createFromFormat('!Y-m-d-H\hi', substr($element->href, strpos($element->href, '201'), 16), $timezone) ?: DateTime::createFromFormat('!d m Y', trim(str_replace(array_values($months), array_keys($months), $element->find('.date', 0)->plaintext)), $timezone);
$item['timestamp'] = $d->format('U');
$item['content'] = $element->find('.lecturer', 0)->innertext . ' - ' . $element->find('.title', 0)->innertext;
$item['uri'] = 'http://www.college-de-france.fr' . $element->href;
$this->items[] = $item;
}
}
public function getCacheDuration(){
return 3600*3; // 3 hours
}
}