diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index 2bf824d..407b92b 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -2,61 +2,59 @@ class NiceMatinBridge extends BridgeAbstract{ public function loadMetadatas() { - $this->maintainer = "pit-fgfjiudghdf"; $this->name = "NiceMatin"; $this->uri = "http://www.nicematin.com/"; $this->description = "Returns the 10 newest posts from NiceMatin (full text)"; - $this->update = "2014-05-26"; - + $this->update = "2016-08-03"; } - public function collectData(array $param){ + function NiceMatinExtractContent($url) { + $html = $this->file_get_html($url); + if(!$html) + $this->returnError('Could not acquire content from url: ' . $url . '!', 404); + + $content = $html->find('article', 0); + if(!$content) + $this->returnError('Could not find \'section\'!', 404); + + $text = preg_replace('#(.*?)#is', '', $content->innertext); + $text = strip_tags($text, '

'); + return $text; + } - function NiceMatinUrl($string) { - $string = str_replace('', '', $string); - //$string = str_replace('.+', '', $string); - $string = preg_replace('/html.*http.*/i','html',$string); - $string = preg_replace('/.*http/i','http',$string); - return $string; - } + public function collectData(array $param){ + $html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404); + $limit = 0; - function NiceMatinExtractContent($url) { - $html2 = $this->file_get_html($url); - $text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext; - $text .= $html2->find('div[id=content-article]', 0)->innertext; - return $text; - } + foreach($html->find('item') as $element) { + if($limit < 10) { + // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) + $element_text = $element->outertext; + $element_text = str_replace('', '', $element_text); + $element_text = str_replace('', '', $element_text); + $element = str_get_html($element_text); - $html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404); - $limit = 0; + $item = new \Item(); + $item->title = $element->find('title', 0)->innertext; + $item->uri = $element->find('url', 0)->innertext; + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = $this->NiceMatinExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + } - foreach($html->find('item') as $element) { - if($limit < 10) { - $item = new \Item(); - //$item->title = NiceMatinStripCDATA($element->find('title', 0)->innertext); - $item->title = $element->find('title', 0)->innertext; - $item->uri = NiceMatinUrl($element->plaintext); + public function getName(){ + return 'NiceMatin'; + } - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = NiceMatinExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } - } + public function getURI(){ + return 'http://www.nicematin.com/'; + } - } - - public function getName(){ - return 'NiceMatin'; - } - - public function getURI(){ - return 'http://www.nicematin.com/'; - } - - public function getCacheDuration(){ - return 3600; // 1 hour - } + public function getCacheDuration(){ + return 3600; // 1 hour + } } -