forked from blallo/rss-bridge
Merge pull request #130 from lagaisse/CpasbienBridge
Cpasbien : Ajout de la date de publication en utilisant la date du cache
This commit is contained in:
commit
75585e7b52
2 changed files with 39 additions and 6 deletions
|
@ -10,7 +10,23 @@
|
||||||
* @maintainer lagaisse
|
* @maintainer lagaisse
|
||||||
* @use1(q="keywords like this")
|
* @use1(q="keywords like this")
|
||||||
*/
|
*/
|
||||||
class CpasbienBridge extends BridgeAbstract{
|
|
||||||
|
// simple_html_dom funtion to get the dom from contents instead from file
|
||||||
|
function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
|
||||||
|
{
|
||||||
|
// We DO force the tags to be terminated.
|
||||||
|
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
|
||||||
|
|
||||||
|
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// The second parameter can force the selectors to all be lowercase.
|
||||||
|
$dom->load($contents, $lowercase, $stripRN);
|
||||||
|
return $dom;
|
||||||
|
}
|
||||||
|
|
||||||
|
class CpasbienBridge extends HttpCachingBridgeAbstract{
|
||||||
|
|
||||||
private $request;
|
private $request;
|
||||||
|
|
||||||
|
@ -27,14 +43,16 @@ class CpasbienBridge extends BridgeAbstract{
|
||||||
foreach ($html->find('#gauche',0)->find('div') as $episode) {
|
foreach ($html->find('#gauche',0)->find('div') as $episode) {
|
||||||
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
|
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
|
||||||
{
|
{
|
||||||
$htmlepisode=file_get_html($episode->find('a', 0)->getAttribute('href'));
|
|
||||||
|
$htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
|
||||||
|
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->name = $episode->find('a', 0)->text();
|
$item->name = $episode->find('a', 0)->text();
|
||||||
$item->title = $episode->find('a', 0)->text();
|
$item->title = $episode->find('a', 0)->text();
|
||||||
$element=$htmlepisode->find('#textefiche', 0)->find('p',1);
|
$item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href'));
|
||||||
if (isset($element)) {
|
$textefiche=$htmlepisode->find('#textefiche', 0)->find('p',1);
|
||||||
$item->content = $element->text();
|
if (isset($textefiche)) {
|
||||||
|
$item->content = $textefiche->text();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text();
|
$item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text();
|
||||||
|
@ -42,6 +60,7 @@ class CpasbienBridge extends BridgeAbstract{
|
||||||
|
|
||||||
$item->id = $episode->find('a', 0)->getAttribute('href');
|
$item->id = $episode->find('a', 0)->getAttribute('href');
|
||||||
$item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href');
|
$item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href');
|
||||||
|
$item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img',0)->getAttribute('src');
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,6 +124,20 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
||||||
return file_get_contents($filename);
|
return file_get_contents($filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function get_cached_time($url) {
|
||||||
|
$simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
|
||||||
|
// TODO build this from the variable given to Cache
|
||||||
|
$pageCacheDir = __DIR__ . '/../cache/'."pages/";
|
||||||
|
$filename = $pageCacheDir.$simplified_url;
|
||||||
|
if (substr($filename, -1) == '/') {
|
||||||
|
$filename = $filename."index.html";
|
||||||
|
}
|
||||||
|
if(!file_exists($filename)) {
|
||||||
|
$this->get_cached($url);
|
||||||
|
}
|
||||||
|
return filectime($filename);
|
||||||
|
}
|
||||||
|
|
||||||
private function refresh_in_cache($pageCacheDir, $filename) {
|
private function refresh_in_cache($pageCacheDir, $filename) {
|
||||||
$currentPath = $filename;
|
$currentPath = $filename;
|
||||||
while(!$pageCacheDir==$currentPath) {
|
while(!$pageCacheDir==$currentPath) {
|
||||||
|
|
Loading…
Reference in a new issue