From 0631e9d835abe62f367083ea705f1bdd69605956 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 15:31:55 +0200 Subject: [PATCH 1/3] Make ExtractContent a member function This fixes error "Using $this when not in object context" Nested functions are not part of the object and therefore don't have access to the object instance $this! --- bridges/DauphineLibereBridge.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index f33c21c..27f84d7 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -7,7 +7,7 @@ class DauphineLibereBridge extends BridgeAbstract{ $this->name = "DauphineLibereBridge Bridge"; $this->uri = "http://www.ledauphine.com/"; $this->description = "Returns the newest articles."; - $this->update = "05/11/2015"; + $this->update = "2016-08-02"; $this->parameters[] = @@ -82,16 +82,15 @@ class DauphineLibereBridge extends BridgeAbstract{ ]'; } - - public function collectData(array $param){ - - function ExtractContent($url) { $html2 = $this->file_get_html($url); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; } + + public function collectData(array $param){ + if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404); @@ -107,7 +106,7 @@ class DauphineLibereBridge extends BridgeAbstract{ $item->title = $element->find('title', 0)->innertext; $item->uri = $element->find('guid', 0)->plaintext; $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = ExtractContent($item->uri); + $item->content = $this->ExtractContent($item->uri); $this->items[] = $item; $limit++; } From 4420906a7a06d4283a16e3778e4a053a2874dbb5 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 15:40:07 +0200 Subject: [PATCH 2/3] Fix indentation and remove unnecessary lines --- bridges/DauphineLibereBridge.php | 204 +++++++++++++++---------------- 1 file changed, 101 insertions(+), 103 deletions(-) diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index 27f84d7..03c35c6 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -1,95 +1,94 @@ maintainer = "qwertygc"; - $this->name = "DauphineLibereBridge Bridge"; - $this->uri = "http://www.ledauphine.com/"; - $this->description = "Returns the newest articles."; - $this->update = "2016-08-02"; + $this->maintainer = "qwertygc"; + $this->name = "DauphineLibereBridge Bridge"; + $this->uri = "http://www.ledauphine.com/"; + $this->description = "Returns the newest articles."; + $this->update = "2016-08-02"; + $this->parameters[] = + '[ + { + "name" : "Catégorie de l\'article", + "identifier" : "u", + "type" : "list", + "values" : [ + { + "name" : "À la une", + "value" : "" + }, + { + "name" : "France Monde", + "value" : "france-monde" + }, + { + "name" : "Faits Divers", + "value" : "faits-divers" + }, + { + "name" : "Économie et Finance", + "value" : "economie-et-finance" + }, + { + "name" : "Politique", + "value" : "politique" + }, + { + "name" : "Sport", + "value" : "sport" + }, + { + "name" : "Ain", + "value" : "ain" + }, + { + "name" : "Alpes-de-Haute-Provence", + "value" : "haute-provence" + }, + { + "name" : "Hautes-Alpes", + "value" : "hautes-alpes" + }, + { + "name" : "Ardèche", + "value" : "ardeche" + }, + { + "name" : "Drôme", + "value" : "drome" + }, + { + "name" : "Isère Sud", + "value" : "isere-sud" + }, + { + "name" : "Savoie", + "value" : "savoie" + }, + { + "name" : "Haute-Savoie", + "value" : "haute-savoie" + }, + { + "name" : "Vaucluse", + "value" : "vaucluse" + } + ] + } + ]'; + } - $this->parameters[] = - '[ - { - "name" : "Catégorie de l\'article", - "identifier" : "u", - "type" : "list", - "values" : [ - { - "name" : "À la une", - "value" : "" - }, - { - "name" : "France Monde", - "value" : "france-monde" - }, - { - "name" : "Faits Divers", - "value" : "faits-divers" - }, - { - "name" : "Économie et Finance", - "value" : "economie-et-finance" - }, - { - "name" : "Politique", - "value" : "politique" - }, - { - "name" : "Sport", - "value" : "sport" - }, - { - "name" : "Ain", - "value" : "ain" - }, - { - "name" : "Alpes-de-Haute-Provence", - "value" : "haute-provence" - }, - { - "name" : "Hautes-Alpes", - "value" : "hautes-alpes" - }, - { - "name" : "Ardèche", - "value" : "ardeche" - }, - { - "name" : "Drôme", - "value" : "drome" - }, - { - "name" : "Isère Sud", - "value" : "isere-sud" - }, - { - "name" : "Savoie", - "value" : "savoie" - }, - { - "name" : "Haute-Savoie", - "value" : "haute-savoie" - }, - { - "name" : "Vaucluse", - "value" : "vaucluse" - } - ] - } - ]'; - } - - function ExtractContent($url) { + function ExtractContent($url) { $html2 = $this->file_get_html($url); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; - } + } - public function collectData(array $param){ + public function collectData(array $param){ if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; @@ -101,29 +100,28 @@ class DauphineLibereBridge extends BridgeAbstract{ $limit = 0; foreach($html->find('item') as $element) { - if($limit < 10) { - $item = new \Item(); - $item->title = $element->find('title', 0)->innertext; - $item->uri = $element->find('guid', 0)->plaintext; - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = $this->ExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } + if($limit < 10) { + $item = new \Item(); + $item->title = $element->find('title', 0)->innertext; + $item->uri = $element->find('guid', 0)->plaintext; + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = $this->ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } } - - } + } - public function getName(){ - return 'Dauphine Bridge'; - } + public function getName(){ + return 'Dauphine Bridge'; + } - public function getURI(){ - return 'http://ledauphine.com/'; - } + public function getURI(){ + return 'http://ledauphine.com/'; + } - public function getCacheDuration(){ - return 3600*2; // 2 hours - // return 0; // 2 hours - } + public function getCacheDuration(){ + return 3600*2; // 2 hours + } } +?> \ No newline at end of file From c8c3e9ef65296c436e8fbc7e11c9e7428da90e74 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 15:57:01 +0200 Subject: [PATCH 3/3] Add http-context to simulate Mozilla user-agent This fixes "Warning: file_get_contents(http://www.ledauphine.com/rss): failed to open stream: HTTP request failed! HTTP/1.1 403 Forbidden" --- bridges/DauphineLibereBridge.php | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index 03c35c6..d2fd1d1 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -81,8 +81,8 @@ class DauphineLibereBridge extends BridgeAbstract { ]'; } - function ExtractContent($url) { - $html2 = $this->file_get_html($url); + function ExtractContent($url, $context) { + $html2 = $this->file_get_html($url, false, $context); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; @@ -90,24 +90,34 @@ class DauphineLibereBridge extends BridgeAbstract { public function collectData(array $param){ + // Simulate Mozilla user-agent to fix error 403 (Forbidden) + $opts = array('http' => + array( + 'method' => 'GET', + 'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' + ) + ); + + $context = stream_context_create($opts); + if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404); + $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss', false, $context) or $this->returnError('Could not request DauphineLibere.', 404); } else { - $html = $this->file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404); + $html = $this->file_get_html('http://www.ledauphine.com/rss', false, $context) or $this->returnError('Could not request DauphineLibere.', 404); } $limit = 0; foreach($html->find('item') as $element) { if($limit < 10) { - $item = new \Item(); - $item->title = $element->find('title', 0)->innertext; - $item->uri = $element->find('guid', 0)->plaintext; - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = $this->ExtractContent($item->uri); - $this->items[] = $item; - $limit++; + $item = new \Item(); + $item->title = $element->find('title', 0)->innertext; + $item->uri = $element->find('guid', 0)->plaintext; + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = $this->ExtractContent($item->uri, $context); + $this->items[] = $item; + $limit++; } } }