From e7966dd1023cb9945037e0d0f226704554128084 Mon Sep 17 00:00:00 2001 From: ORelio Date: Thu, 22 Oct 2015 14:52:15 +0200 Subject: [PATCH] Add Le Monde Informatique bridge --- bridges/LeMondeInformatiqueBridge.php | 79 +++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 bridges/LeMondeInformatiqueBridge.php diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php new file mode 100644 index 0000000..e7617b4 --- /dev/null +++ b/bridges/LeMondeInformatiqueBridge.php @@ -0,0 +1,79 @@ +', '', $string); + return $string; + } + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + function CleanArticle($article_html) { + $article_html = StripWithDelimiters($article_html, ''); + $article_html = StripWithDelimiters($article_html, '

', '

'); + return $article_html; + } + + $feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml'; + $html = file_get_html($feedUrl) or $this->returnError('Could not request LeMondeInformatique: '.$feedUrl, 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + + //Retrieve article details + $article_uri = $element->innertext; + $article_uri = substr($article_uri, strpos($article_uri, '') + 6); + $article_uri = substr($article_uri, 0, strpos($article_uri, '')); + $article_html = file_get_html($article_uri) or $this->returnError('Could not request LeMondeInformatique: '.$article_uri, 404); + $thumbnailUri = $article_html->find('div#article', 0)->find('img#illustration', 0)->src; + $article_content = CleanArticle($article_html->find('div#article', 0)->innertext); + $article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext; + + //Build and add final item + $item = new \Item(); + $item->uri = $article_uri; + $item->thumbnailUri = $thumbnailUri; + $item->title = $article_title; + $item->author = StripCDATA($element->find('dc:creator', 0)->innertext); + $item->timestamp = strtotime($element->find('dc:date', 0)->plaintext); + $item->content = $article_content; + $this->items[] = $item; + $limit++; + } + } + } + + public function getName() { + return 'Le Monde Informatique'; + } + + public function getURI() { + return 'http://www.lemondeinformatique.fr/'; + } + + public function getCacheDuration() { + return 1800; // 30 minutes + // return 0; + } +}