Merge pull request #321 from LogMANOriginal/NiceMatinBridge

Nice matin bridge
2016-08-03 22:58:03 +02:00 · 2016-08-03 22:58:03 +02:00 · 9495e830e7
commit 9495e830e7
parent 0737833c88 ec5cb657aa
1 changed files with 43 additions and 45 deletions
--- a/bridges/NiceMatinBridge.php
+++ b/bridges/NiceMatinBridge.php
@ -2,61 +2,59 @@
 class NiceMatinBridge extends BridgeAbstract{

 	public function loadMetadatas() {
-
 		$this->maintainer = "pit-fgfjiudghdf";
 		$this->name = "NiceMatin";
 		$this->uri = "http://www.nicematin.com/";
 		$this->description = "Returns the 10 newest posts from NiceMatin (full text)";
-		$this->update = "2014-05-26";
-
+		$this->update = "2016-08-03";
 	}

-    public function collectData(array $param){
+	function NiceMatinExtractContent($url) {
+		$html = $this->file_get_html($url);
+		if(!$html)
+			$this->returnError('Could not acquire content from url: ' . $url . '!', 404);
+		
+		$content = $html->find('article', 0);
+		if(!$content)
+			$this->returnError('Could not find \'section\'!', 404);
+		
+		$text = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $content->innertext);
+		$text = strip_tags($text, '<p><a><img>');
+		return $text;
+	}

-    function NiceMatinUrl($string) {
-        $string = str_replace('</link>', '', $string);
-        //$string = str_replace('.+', '', $string);
-        $string = preg_replace('/html.*http.*/i','html',$string);
-        $string = preg_replace('/.*http/i','http',$string);
-        return $string;
-    }
+	public function collectData(array $param){
+		$html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404);
+		$limit = 0;

-    function NiceMatinExtractContent($url) {
-        $html2 = $this->file_get_html($url);
-        $text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext;
-        $text .= $html2->find('div[id=content-article]', 0)->innertext;
-        return $text;
-    }
+		foreach($html->find('item') as $element) {
+			if($limit < 10) {
+				// We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom)
+				$element_text = $element->outertext;
+				$element_text = str_replace('<link>', '<url>', $element_text);
+				$element_text = str_replace('</link>', '</url>', $element_text);
+				$element = str_get_html($element_text);

-        $html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404);
-        $limit = 0;
+				$item = new \Item();
+				$item->title = $element->find('title', 0)->innertext;
+				$item->uri = $element->find('url', 0)->innertext;
+				$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
+				$item->content = $this->NiceMatinExtractContent($item->uri);
+				$this->items[] = $item;
+				$limit++;
+			}
+		}
+	}

-        foreach($html->find('item') as $element) {
-         if($limit < 10) {
-         $item = new \Item();
-         //$item->title = NiceMatinStripCDATA($element->find('title', 0)->innertext);
-         $item->title = $element->find('title', 0)->innertext;
-         $item->uri = NiceMatinUrl($element->plaintext);
+	public function getName(){
+		return 'NiceMatin';
+	}

-         $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
-         $item->content = NiceMatinExtractContent($item->uri);
-         $this->items[] = $item;
-         $limit++;
-         }
-        }
+	public function getURI(){
+		return 'http://www.nicematin.com/';
+	}

-    }
-
-    public function getName(){
-        return 'NiceMatin';
-    }
-
-    public function getURI(){
-        return 'http://www.nicematin.com/';
-    }
-
-    public function getCacheDuration(){
-        return 3600; // 1 hour
-    }
+	public function getCacheDuration(){
+		return 3600; // 1 hour
+	}
 }
-