From b707a29b7e743e1a0668abd50d170f8e0797737e Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 25 Jun 2016 12:12:33 +0200 Subject: [PATCH] [JapanExpo] Allow fetching full content Current bridge only allows titles. Add a setting for fetching full content. --- bridges/JapanExpoBridge.php | 119 +++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 22 deletions(-) diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index c9ebbba..c8227ae 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -2,30 +2,105 @@ class JapanExpoBridge extends BridgeAbstract{ public function loadMetadatas() { - - $this->maintainer = "Ginko"; - $this->name = "JapanExpo"; - $this->uri = "http://www.japan-expo-paris.com/fr/actualites"; - $this->description = "Returns most recent results from Japan Expo actualités."; - $this->update = "2016-01-19"; - + $this->maintainer = 'Ginko'; + $this->name = 'JapanExpo'; + $this->uri = 'http://www.japan-expo-paris.com/fr/actualites'; + $this->description = 'Returns most recent entries from Japan Expo actualités.'; + $this->update = '2016-06-12'; + $this->parameters[] = + '[ + { + "name" : "Mode", + "type" : "list", + "identifier" : "mode", + "values" : + [ + { + "name" : "Titles only", + "value" : "light" + }, + { + "name" : "Full Contents", + "value" : "full" + } + ] + } + ]'; } - public function collectData(array $param){ - $link = 'http://www.japan-expo-paris.com/fr/actualites'; - - $html = file_get_html($link) or $this->returnError('Could not request JapanExpo. for : ' . $link , 404); - - foreach($html->find('a._tile2') as $element) { - $item = new Item(); - $item->uri = $element->href; - $item->title = $element->find('span._title', 0)->plaintext; - $style = $element->find('img.rspvimgset', 0)->style; - preg_match('/url\(([^)]+)\)/', $style, $match); - $item->content = "
".$element->find('span.date', 0)->plaintext; - $this->items[] = $item; + public function collectData(array $param) { + + function french_pubdate_to_timestamp($date_to_parse) { + return strtotime( + strtr( + strtolower(str_replace('Publié le ', '', $date_to_parse)), + array( + 'janvier' => 'jan', + 'février' => 'feb', + 'mars' => 'march', + 'avril' => 'apr', + 'mai' => 'may', + 'juin' => 'jun', + 'juillet' => 'jul', + 'août' => 'aug', + 'septembre' => 'sep', + 'octobre' => 'oct', + 'novembre' => 'nov', + 'décembre' => 'dec' + ) + ) + ); + } + + $convert_article_images = function ($matches) { + if (is_array($matches) && count($matches) > 1) { + return ''; + } + }; + + $link = 'http://www.japan-expo-paris.com/fr/actualites'; + $html = file_get_html($link) or $this->returnError('Could not request JapanExpo: '.$link , 500); + $fullcontent = (!empty($param['mode']) && $param['mode'] == 'full'); + $count = 0; + + foreach ($html->find('a._tile2') as $element) { + + $url = $element->href; + $thumbnail = 'http://s.japan-expo.com/katana/images/JES049/paris.png'; + preg_match('/url\(([^)]+)\)/', $element->find('img.rspvimgset', 0)->style, $img_search_result); + if (count($img_search_result) >= 2) + $thumbnail = trim($img_search_result[1], "'"); + + if ($fullcontent) { + if ($count < 5) { + $article_html = file_get_html($url) or $this->returnError('Could not request JapanExpo: '.$url , 500); + $header = $article_html->find('header.pageHeadBox', 0); + $timestamp = strtotime($header->find('time', 0)->datetime); + $title_html = $header->find('div.section', 0)->next_sibling(); + $title = $title_html->plaintext; + $headings = $title_html->next_sibling()->outertext; + $article = $article_html->find('div.content', 0)->innertext; + $article = preg_replace_callback('/]+ style="[^\(]+\(\'([^\']+)\'[^>]+>/i', $convert_article_images, $article); + $content = $headings.$article; + } else { + break; + } + } else { + $date_text = $element->find('span.date', 0)->plaintext; + $timestamp = french_pubdate_to_timestamp($date_text); + $title = trim($element->find('span._title', 0)->plaintext); + $content = '
'.$date_text.'
Lire l\'article'; + } + + $item = new \Item(); + $item->uri = $url; + $item->title = $title; + $item->timestamp = $timestamp; + $item->thumbnailUri = $thumbnail; + $item->content = $content; + $this->items[] = $item; + $count++; } - } public function getName(){ @@ -37,6 +112,6 @@ class JapanExpoBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 86400; // 1 day + return 14400; // 4 hours } }