From 47dd6c3b03abc3d02a8465152da0356f507609e4 Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 3 Feb 2016 20:36:16 +0100 Subject: [PATCH 1/6] [LeMondeInformatique] fix title stripping Title wasn't properly stripped due to changes on their website. --- bridges/LeMondeInformatiqueBridge.php | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index a0c16b2..e2ba463 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -1,15 +1,15 @@ maintainer = "ORelio"; - $this->name = "Le Monde Informatique"; - $this->uri = "http://www.lemondeinformatique.fr/"; - $this->description = "Returns the newest articles."; - $this->update = "2015-09-08"; + $this->maintainer = "ORelio"; + $this->name = "Le Monde Informatique"; + $this->uri = "http://www.lemondeinformatique.fr/"; + $this->description = "Returns the newest articles."; + $this->update = "2016-01-28"; - } + } public function collectData(array $param) { @@ -29,7 +29,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { function CleanArticle($article_html) { $article_html = StripWithDelimiters($article_html, ''); - $article_html = StripWithDelimiters($article_html, '

', '

'); + $article_html = StripWithDelimiters($article_html, '

'); return $article_html; } @@ -73,6 +73,5 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { public function getCacheDuration() { return 1800; // 30 minutes - // return 0; } } From e1b7ffdc81146b84669df997d293688f2d1731dc Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 3 Feb 2016 20:37:09 +0100 Subject: [PATCH 2/6] [T411] Change encoding to UTF-8 - Stop encoding to UTF-8, their website is now natively in UTF-8 - More error handling --- bridges/T411Bridge.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index e1e7a95..da2e9d0 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -37,7 +37,9 @@ class T411Bridge extends BridgeAbstract { //Retrieve torrent listing as truncated rss, which does not contain torrent description $url = 'http://www.t411.in/torrents/search/?'.$param['search'].'&order=added&type=desc'; $html = file_get_html($url) or $this->returnError('Could not request t411: '.$url, 500); - $results = $html->find('table.results')[0] or $this->returnError('No results from t411: '.$url, 500); + $results = $html->find('table.results', 0); + if (is_null($results)) + $this->returnError('No results from t411: '.$url, 500); $limit = 0; //Process each item individually @@ -77,7 +79,7 @@ class T411Bridge extends BridgeAbstract { $item->author = $item_author; $item->timestamp = $item_date; $item->thumbnailUri = $item_image; - $item->content = utf8_encode($item_desc->innertext); + $item->content = $item_desc->innertext; $this->items[] = $item; $limit++; } From a9c40e98c470414b3c82d34929fed0deaebd0f0c Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 13 Feb 2016 13:52:07 +0100 Subject: [PATCH 3/6] [CNET] Improve script removal --- bridges/CNETBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 7b4e1b3..ec7decf 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -9,7 +9,7 @@ class CNETBridge extends BridgeAbstract { $this->name = 'CNET News'; $this->uri = 'http://www.cnet.com/'; $this->description = 'Returns the newest articles.
You may specify a topic found in some section URLs, else all topics are selected.'; - $this->update = '2016-01-23'; + $this->update = '2016-02-06'; $this->parameters[] = '[ @@ -41,7 +41,7 @@ class CNETBridge extends BridgeAbstract { function CleanArticle($article_html) { $article_html = '

'.substr($article_html, strpos($article_html, '

') + 3); $article_html = StripWithDelimiters($article_html, '', ''); - $article_html = StripWithDelimiters($article_html, ''); + $article_html = StripWithDelimiters($article_html, ''); $article_html = StripWithDelimiters($article_html, '