From fec864e0104ffef9f0cd95a1bc2963c92a5776b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 4 Sep 2016 14:07:58 +0200 Subject: [PATCH 1/5] [FourchanBridge] use 'category' and 'thread' parameters instead of full URI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This breaks compatibility with previous versions of FourChanBridge. Bridges should never use full URIs as inputs as their validation will always be more complicated, hence prone to security issues, than rebuilding a clean URI from simple validated inputs. Signed-off-by: Pierre Mazière --- bridges/FourchanBridge.php | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index f17f38a..90a746b 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -3,35 +3,35 @@ class FourchanBridge extends BridgeAbstract{ const MAINTAINER = "mitsukarenai"; const NAME = "4chan"; - const URI = "https://www.4chan.org/"; + const URI = "https://boards.4chan.org/"; const DESCRIPTION = "Returns posts from the specified thread"; const PARAMETERS = array( array( - 't'=>array( - 'name'=>'Thread URL', - 'pattern'=>'(https:\/\/)?boards\.4chan\.org\/.*thread\/.*', + 'c'=>array( + 'name'=>'Thread category', 'required'=>true - ) + ), + 't'=>array( + 'name'=>'Thread number', + 'type'=>'number', + 'required'=>true + ) )); + public function getURI(){ + return static::URI.$this->getInput('c').'/thread/'.$this->getInput('t'); + + } + public function collectData(){ - $thread = parse_url($this->getInput('t')) - or $this->returnClientError('This URL seems malformed, please check it.'); - if($thread['host'] !== 'boards.4chan.org') - $this->returnClientError('4chan thread URL only.'); - - if(strpos($thread['path'], 'thread/') === FALSE) - $this->returnClientError('You must specify the thread URL.'); - - $url = 'https://boards.4chan.org'.$thread['path']; - $html = $this->getSimpleHTMLDOM($url) - or $this->returnServerError("Could not request 4chan, thread not found"); + $html = $this->getSimpleHTMLDOM($this->getURI()) + or $this->returnServerError("Could not request 4chan, thread not found"); foreach($html->find('div.postContainer') as $element) { $item = array(); $item['id'] = $element->find('.post', 0)->getAttribute('id'); - $item['uri'] = $url.'#'.$item['id']; + $item['uri'] = $this->getURI().'#'.$item['id']; $item['timestamp'] = $element->find('span.dateTime', 0)->getAttribute('data-utc'); $item['author'] = $element->find('span.name', 0)->plaintext; From d60220769eaf9dd04baaa9c09d040f0fe966a5f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 4 Sep 2016 14:08:39 +0200 Subject: [PATCH 2/5] [FourChanBridge] fix relative links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/FourchanBridge.php | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index 90a746b..3b6d105 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -45,10 +45,21 @@ class FourchanBridge extends BridgeAbstract{ if(!empty($element->find('span.subject', 0)->innertext )) { $item['subject'] = $element->find('span.subject', 0)->innertext; } - $item['title'] = (isset($item['subject']) ? $item['subject'].' - ' : '' ) . 'reply '.$item['id'].' | '.$item['author']; + $item['title'] = 'reply '.$item['id'].' | '.$item['author']; + if(isset($item['subject'])){ + $item['title'] = $item['subject'].' - '.$item['title']; + } - $item['content'] = (isset($item['image']) ? ''.$item['id'].'
' : '') . ''.$element->find('.postMessage', 0)->innertext.''; + $content = $element->find('.postMessage', 0)->innertext; + $content = str_replace('href="#p','href="'.$this->getURI().'#p',$content); + $item['content'] = ''.$content.''; + if(isset($item['image'])){ + $item['content'] = '' + .''.$item['id'].'' + .'
' + .$item['content']; + } $this->items[] = $item; } $this->items = array_reverse($this->items); From 0c139941d9cebce89a27c446abe35f5f815e6375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 4 Sep 2016 14:26:11 +0200 Subject: [PATCH 3/5] [NovelUpdatesBridge] use 'name' parameter instead of full URI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This breaks compatibility with previous versions of NoveUpdatesBridge. Bridges should never use full URIs as inputs since their validation will always be more complicated, hence prone to security issues, than rebuilding a clean URI from simple validated inputs. Signed-off-by: Pierre Mazière --- bridges/NovelUpdatesBridge.php | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/bridges/NovelUpdatesBridge.php b/bridges/NovelUpdatesBridge.php index b865737..95dfc7f 100644 --- a/bridges/NovelUpdatesBridge.php +++ b/bridges/NovelUpdatesBridge.php @@ -7,23 +7,22 @@ class NovelUpdatesBridge extends BridgeAbstract{ const DESCRIPTION = "Returns releases from Novel Updates"; const PARAMETERS = array( array( 'n'=>array( - 'name'=>'Novel URL', - 'patterns'=>'http:\/\/www.novelupdates.com\/.*', - 'required'=>true + 'name'=>'Novel name as found in the url', + 'exampleValue'=>'spirit-realm', + 'required'=>true ) )); private $seriesTitle=''; + public function getURI(){ + return static::URI.'/series/'.$this->getInput('n').'/'; + } + public function collectData(){ - $thread = parse_url($this->getInput('n')) - or $this->returnClientError('This URL seems malformed, please check it.'); - if($thread['host'] !== 'www.novelupdates.com') - $this->returnClientError('NovelUpdates URL only.'); - if(strpos($thread['path'], 'series/') === FALSE) - $this->returnClientError('You must specify the novel URL.'); - $url = self::URI.$thread['path'].''; - $fullhtml = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); + $fullhtml = $this->getSimpleHTMLDOM($this->getURI()) + or $this->returnServerError('Could not request NovelUpdates, novel "'.$this->getInput('n').'" not found'); + $this->seriesTitle = $fullhtml->find('h4.seriestitle', 0)->plaintext; // dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259 // forcefully removes tbody @@ -37,13 +36,17 @@ class NovelUpdatesBridge extends BridgeAbstract{ $item['title'] = $element->find('td', 2)->find('a', 0)->plaintext; $item['team'] = $element->find('td', 1)->innertext; $item['timestamp'] = strtotime($element->find('td', 0)->plaintext); - $item['content'] = ''.$this->seriesTitle.' - '.$item['title'].' by '.$item['team'].'
'.$fullhtml->find('div.seriesimg', 0)->innertext.''; + $item['content'] = + '' + .$this->seriesTitle.' - '.$item['title'] + .' by '.$item['team'].'
' + .''.$fullhtml->find('div.seriesimg', 0)->innertext.''; $this->items[] = $item; } } public function getName(){ - return (!empty($this->seriesTitle) ? $this->seriesTitle.' - ' : '') .'Novel Updates'; + return $this->seriesTitle. ' - ' . static::NAME; } public function getCacheDuration(){ From 3daeacc2089386e651d4523d4187d6b688d9c513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 4 Sep 2016 14:40:38 +0200 Subject: [PATCH 4/5] [VkBridge] use 'Group or user name' parameter instead of full URI This breaks compatibility with previous versions of VkBridge (which seems broken anyway). Bridges should never use full URIs as inputs since their validation will always be more complicated, hence prone to security issues, than rebuilding a clean URI from simple validated inputs. --- bridges/VkBridge.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index feaafa4..5489599 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -6,18 +6,21 @@ class VkBridge extends BridgeAbstract { const NAME = "VK.com"; const URI = "http://vk.com/"; const DESCRIPTION = "Working with open pages"; - const PARAMETERS=array( - 'Url on page group or user' => array( + const PARAMETERS=array( array( 'u'=>array( - 'name'=>'Url', + 'name'=>'Group or user name', 'required'=>true ) ) ); + public function getURI(){ + return static::URI.urlencode($this->getInput('u')); + } public function collectData(){ - $text_html = $this->getContents(urldecode($this->getInput('u'))) - or $this->returnServerError('No results for this query.'); + $text_html = $this->getContents($this->getURI()) + or $this->returnServerError('No results for group or user name "'.$this->getInput('u').'".'); + $text_html = iconv('windows-1251', 'utf-8', $text_html); $html = str_get_html($text_html); foreach ($html->find('div.post_table') as $post) { From a0dfbe6efd946b289116e5ccc436398dc385b5c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 4 Sep 2016 14:58:25 +0200 Subject: [PATCH 5/5] [AcrimedBridge] reapply code simplification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/AcrimedBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 54c6f87..39ba87b 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -7,7 +7,7 @@ class AcrimedBridge extends FeedExpander { const DESCRIPTION = "Returns the newest articles."; public function collectData(){ - $this->collectExpandableDatas("http://www.acrimed.org/spip.php?page=backend"); + $this->collectExpandableDatas(static::URI.'spip.php?page=backend'); } protected function parseItem($newsItem){ @@ -16,7 +16,7 @@ class AcrimedBridge extends FeedExpander { $hs = new HTMLSanitizer(); $articlePage = $this->getSimpleHTMLDOM($newsItem->link); $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); - $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); + $article = HTMLSanitizer::defaultImageSrcTo($article, static::URI); $item['content'] = $article; return $item;