Merge pull request #130 from lagaisse/CpasbienBridge
Cpasbien : Ajout de la date de publication en utilisant la date du cache
This commit is contained in:
commit
75585e7b52
2 changed files with 39 additions and 6 deletions
|
@ -10,7 +10,23 @@
|
|||
* @maintainer lagaisse
|
||||
* @use1(q="keywords like this")
|
||||
*/
|
||||
class CpasbienBridge extends BridgeAbstract{
|
||||
|
||||
// simple_html_dom funtion to get the dom from contents instead from file
|
||||
function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
|
||||
{
|
||||
// We DO force the tags to be terminated.
|
||||
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
|
||||
|
||||
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// The second parameter can force the selectors to all be lowercase.
|
||||
$dom->load($contents, $lowercase, $stripRN);
|
||||
return $dom;
|
||||
}
|
||||
|
||||
class CpasbienBridge extends HttpCachingBridgeAbstract{
|
||||
|
||||
private $request;
|
||||
|
||||
|
@ -27,14 +43,16 @@ class CpasbienBridge extends BridgeAbstract{
|
|||
foreach ($html->find('#gauche',0)->find('div') as $episode) {
|
||||
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
|
||||
{
|
||||
$htmlepisode=file_get_html($episode->find('a', 0)->getAttribute('href'));
|
||||
|
||||
$htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
|
||||
|
||||
$item = new \Item();
|
||||
$item->name = $episode->find('a', 0)->text();
|
||||
$item->title = $episode->find('a', 0)->text();
|
||||
$element=$htmlepisode->find('#textefiche', 0)->find('p',1);
|
||||
if (isset($element)) {
|
||||
$item->content = $element->text();
|
||||
$item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href'));
|
||||
$textefiche=$htmlepisode->find('#textefiche', 0)->find('p',1);
|
||||
if (isset($textefiche)) {
|
||||
$item->content = $textefiche->text();
|
||||
}
|
||||
else {
|
||||
$item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text();
|
||||
|
@ -42,6 +60,7 @@ class CpasbienBridge extends BridgeAbstract{
|
|||
|
||||
$item->id = $episode->find('a', 0)->getAttribute('href');
|
||||
$item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href');
|
||||
$item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img',0)->getAttribute('src');
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -124,6 +124,20 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
|||
return file_get_contents($filename);
|
||||
}
|
||||
|
||||
public function get_cached_time($url) {
|
||||
$simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
|
||||
// TODO build this from the variable given to Cache
|
||||
$pageCacheDir = __DIR__ . '/../cache/'."pages/";
|
||||
$filename = $pageCacheDir.$simplified_url;
|
||||
if (substr($filename, -1) == '/') {
|
||||
$filename = $filename."index.html";
|
||||
}
|
||||
if(!file_exists($filename)) {
|
||||
$this->get_cached($url);
|
||||
}
|
||||
return filectime($filename);
|
||||
}
|
||||
|
||||
private function refresh_in_cache($pageCacheDir, $filename) {
|
||||
$currentPath = $filename;
|
||||
while(!$pageCacheDir==$currentPath) {
|
||||
|
|
Loading…
Reference in a new issue