From 94289511e151dd7d1421635807f977cb9a165449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 19:04:32 +0200 Subject: [PATCH 001/149] [GithubIssueBridge] fix parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/GithubIssueBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index eaf4d9d..4c56cf0 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -14,7 +14,7 @@ class GithubIssueBridge extends BridgeAbstract{ $this->description = 'Returns the comments of a github project issue'; $this->update = '2016-06-25'; - $this->parameters= + $this->parameters[]= '[ { "name" : "User name", From 80651273b3cc841d5a3776e14a82884f1971a93b Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 26 Jul 2016 21:19:39 +0200 Subject: [PATCH 002/149] [ZDNet] Auto-insert image --- bridges/ZDNetBridge.php | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 4267b98..79924aa 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -6,8 +6,8 @@ class ZDNetBridge extends BridgeAbstract { $this->maintainer = 'ORelio'; $this->name = $this->getName(); $this->uri = $this->getURI(); - $this->description = 'Returns the newest articles.'; - $this->update = '2016-07-18'; + $this->description = 'Technology News, Analysis, Comments and Product Reviews for IT Professionals.'; + $this->update = '2016-07-20'; $this->parameters[] = // http://www.zdnet.com/zdnet.opml @@ -261,7 +261,7 @@ class ZDNetBridge extends BridgeAbstract { $thumbnail = $article->find('meta[itemprop=image]', 0); if (is_object($thumbnail)) $thumbnail = $thumbnail->content; - else $thumbnail = 'http://zdnet1.cbsistatic.com/fly/bundles/zdnetcss/images/logos/logo-192x192.png'; + else $thumbnail = ''; $contents = $article->find('article', 0)->innertext; foreach (array( @@ -277,8 +277,17 @@ class ZDNetBridge extends BridgeAbstract { } $contents = StripWithDelimiters($contents, ''); $contents = StripWithDelimiters($contents, ''); - $contents = StripWithDelimiters($contents, '
')); + $content_img = strpos($contents, '

'; //Include thumbnail + $contents = $content_img + .'

'.$article_subtitle.'

' + .$contents; + + if ($thumbnail == '') + $thumbnail = 'http://zdnet1.cbsistatic.com/fly/bundles/zdnetcss/images/logos/logo-192x192.png'; $item = new \Item(); $item->author = $author; @@ -303,6 +312,6 @@ class ZDNetBridge extends BridgeAbstract { } public function getCacheDuration() { - return 3600; + return 3600; //1 hour } -} \ No newline at end of file +} From dfc4e8aaac78278dadc1198d142628a995491f96 Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 26 Jul 2016 21:20:13 +0200 Subject: [PATCH 003/149] [Nextgov] New bridge. USA tech news. --- bridges/NextgovBridge.php | 110 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 bridges/NextgovBridge.php diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php new file mode 100644 index 0000000..eac7aa1 --- /dev/null +++ b/bridges/NextgovBridge.php @@ -0,0 +1,110 @@ +maintainer = 'ORelio'; + $this->name = $this->getName(); + $this->uri = $this->getURI(); + $this->description = 'USA Federal technology news, best practices, and web 2.0 tools.'; + $this->update = '2016-07-20'; + + $this->parameters[] = + '[ + { + "name" : "Category", + "type" : "list", + "identifier" : "category", + "values" : + [ + { "name" : "All", "value" : "all" }, + { "name" : "Technology News", "value" : "technology-news" }, + { "name" : "CIO Briefing", "value" : "cio-briefing" }, + { "name" : "Emerging Tech", "value" : "emerging-tech" }, + { "name" : "Cloud", "value" : "cloud-computing" }, + { "name" : "Cybersecurity", "value" : "cybersecurity" }, + { "name" : "Mobile", "value" : "mobile" }, + { "name" : "Health", "value" : "health" }, + { "name" : "Defense", "value" : "defense" }, + { "name" : "Big Data", "value" : "big-data" } + ] + } + ]'; + + } + + public function collectData(array $param) { + + function ExtractFromDelimiters($string, $start, $end) { + if (strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } return false; + } + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + $category = $param['category']; + if (empty($category)) + $category = 'all'; + if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32)) + $this->returnError('Invalid "category" parameter.', 400); + $url = $this->getURI().'rss/'.$category.'/'; + $html = $this->file_get_html($url) or $this->returnError('Could not request Nextgov: '.$url, 500); + $limit = 0; + + foreach ($html->find('item') as $element) { + if ($limit < 10) { + + $article_url = ExtractFromDelimiters($element->innertext, '', ''); + $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', ''); + $article_title = $element->find('title', 0)->plaintext; + $article_subtitle = $element->find('description', 0)->plaintext; + $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $article_thumbnail = ExtractFromDelimiters($element->innertext, 'file_get_html($article_url) or $this->returnError('Could not request Nextgov: '.$article_url, 500); + + $contents = $article->find('div.wysiwyg', 0)->innertext; + $contents = StripWithDelimiters($contents, '
', '
'); + $contents = StripWithDelimiters($contents, ''); //ad outer div + $contents = StripWithDelimiters($contents, ''); + $contents = ($article_thumbnail == '' ? '' : '

') + .'

'.$article_subtitle.'

' + .trim($contents); + + if ($article_thumbnail == '') + $article_thumbnail = 'http://cdn.nextgov.com/nextgov/images/logo.png'; + + $item = new \Item(); + $item->uri = $article_url; + $item->title = $article_title; + $item->author = $article_author; + $item->thumbnailUri = $article_thumbnail; + $item->timestamp = $article_timestamp; + $item->content = $contents; + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() { + return 'Nextgov Bridge'; + } + + public function getURI() { + return 'https://www.nextgov.com/'; + } + + public function getCacheDuration() { + return 3600; //1 hour + } +} \ No newline at end of file From 48883a9cc4b339a8a97f2e7c97aae78e9c1ac55a Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 26 Jul 2016 21:20:43 +0200 Subject: [PATCH 004/149] [TheHackerNews] New bridge. Security news. --- bridges/TheHackerNewsBridge.php | 90 +++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 bridges/TheHackerNewsBridge.php diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php new file mode 100644 index 0000000..4081b93 --- /dev/null +++ b/bridges/TheHackerNewsBridge.php @@ -0,0 +1,90 @@ +maintainer = 'ORelio'; + $this->name = $this->getName(); + $this->uri = $this->getURI(); + $this->description = 'Cyber Security, Hacking, Technology News.'; + $this->update = '2016-07-22'; + + } + + public function collectData(array $param) { + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + function StripRecursiveHTMLSection($string, $tag_name, $tag_start) { + $open_tag = '<'.$tag_name; + $close_tag = ''; + $close_tag_length = strlen($close_tag); + if (strpos($tag_start, $open_tag) === 0) { + while (strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while ($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; + } + + $html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request TheHackerNews: '.$this->getURI(), 500); + $limit = 0; + + foreach ($html->find('article') as $element) { + if ($limit < 5) { + + $article_url = $element->find('a.entry-title', 0)->href; + $article_author = trim($element->find('span.vcard', 0)->plaintext); + $article_title = $element->find('a.entry-title', 0)->plaintext; + $article_timestamp = strtotime($element->find('span.updated', 0)->plaintext); + $article_thumbnail = $element->find('img', 0)->src; + $article = $this->file_get_html($article_url) or $this->returnError('Could not request TheHackerNews: '.$article_url, 500); + + $contents = $article->find('div.articlebodyonly', 0)->innertext; + $contents = StripRecursiveHTMLSection($contents, 'div', '
'); + + $item = new \Item(); + $item->uri = $article_url; + $item->title = $article_title; + $item->author = $article_author; + $item->thumbnailUri = $article_thumbnail; + $item->timestamp = $article_timestamp; + $item->content = trim($contents); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() { + return 'The Hacker News Bridge'; + } + + public function getURI() { + return 'https://thehackernews.com/'; + } + + public function getCacheDuration() { + return 3600; //1 hour + } +} \ No newline at end of file From f72cde7fa12095270b8c18f3d0c19ed5367004f1 Mon Sep 17 00:00:00 2001 From: Frans de Jonge Date: Fri, 29 Jul 2016 08:23:03 +0200 Subject: [PATCH 005/149] =?UTF-8?q?RTBFBridge:=20exclude=20"=C3=80=20voir?= =?UTF-8?q?=20aussi"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bridges/RTBFBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/RTBFBridge.php b/bridges/RTBFBridge.php index c82041b..3b2a890 100644 --- a/bridges/RTBFBridge.php +++ b/bridges/RTBFBridge.php @@ -26,7 +26,7 @@ class RTBFBridge extends BridgeAbstract { if (isset($param['c'])) { $html = $this->file_get_html('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnError('Could not request RTBF.', 404); - foreach($html->find('.rtbf-media-grid article') as $element) { + foreach($html->find('section[id!=widget-ml-avoiraussi-] .rtbf-media-grid article') as $element) { if($count < $limit) { $item = new \Item(); $item->id = $element->getAttribute('data-id'); From 74b6dd37f93facb2a8a83e13cf0fdf28ecbba870 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Sat, 30 Jul 2016 16:16:02 +0200 Subject: [PATCH 006/149] Add Sens Critique Bridge Get the movies, series, video games, books and musics news. --- bridges/SensCritiqueBridge.php | 146 +++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 bridges/SensCritiqueBridge.php diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php new file mode 100644 index 0000000..c1aaa93 --- /dev/null +++ b/bridges/SensCritiqueBridge.php @@ -0,0 +1,146 @@ +maintainer = "kranack"; + $this->name = "Sens Critique"; + $this->uri = "http://www.senscritique.com"; + $this->description = "Sens Critique news"; + $this->update = "2016-07-29"; + + $this->parameters[] = + '[ + { + "name" : "Movies", + "identifier" : "m", + "type": "checkbox" + }, + { + "name" : "Series", + "identifier" : "s", + "type": "checkbox" + }, + { + "name" : "Video Games", + "identifier" : "g", + "type": "checkbox" + }, + { + "name" : "Books", + "identifier" : "b", + "type": "checkbox" + }, + { + "name" : "BD", + "identifier" : "bd", + "type": "checkbox" + }, + { + "name" : "Music", + "identifier" : "mu", + "type": "checkbox" + } + ]'; + } + + public function collectData(array $param) { + if ((isset($param['m']) && $param['m'])) { + $this->collectMoviesData(); + } else if ((isset($param['s']) && $param['s'])) { + $this->collectSeriesData(); + } else if ((isset($param['g']) && $param['g'])) { + $this->collectGamesData(); + } else if ((isset($param['b']) && $param['b'])) { + $this->collectBooksData(); + } else if ((isset($param['bd']) && $param['bd'])) { + $this->collectBDsData(); + } else if ((isset($param['mu']) && $param['mu'])) { + $this->collectMusicsData(); + } else { + $this->returnError('You must choose a category', 400); + } + } + + public function collectMoviesData() { + $html = ''; + $html = $this->file_get_html('http://www.senscritique.com/films/cette-semaine') or $this->returnError('No results for this query.', 404); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + + public function collectSeriesData() { + $html = ''; + $html = $this->file_get_html('http://www.senscritique.com/series/actualite') or $this->returnError('No results for this query.', 404); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + + public function collectGamesData() { + $html = ''; + $html = $this->file_get_html('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnError('No results for this query.', 404); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + + public function collectBooksData() { + $html = ''; + $html = $this->file_get_html('http://www.senscritique.com/livres/actualite') or $this->returnError('No results for this query.', 404); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + + public function collectBDsData() { + $html = ''; + $html = $this->file_get_html('http://www.senscritique.com/bd/actualite') or $this->returnError('No results for this query.', 404); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + + public function collectMusicsData() { + $html = ''; + $html = $this->file_get_html('http://www.senscritique.com/musique/actualite') or $this->returnError('No results for this query.', 404); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + + public function extractDataFromList($list) { + if ($list === null) { + $this->returnError('Cannot extract data from list', 400); + } + + foreach ($list->find('li') as $movie) { + $item = new \Item(); + $item->name = htmlspecialchars_decode($movie->find('.elco-title a', 0)->plaintext, ENT_QUOTES) . ' ' . $movie->find('.elco-date', 0)->plaintext; + $item->title = $movie->find('.elco-title a', 0)->plaintext . ' ' . $movie->find('.elco-date', 0)->plaintext; + $item->content = '' . $movie->find('.elco-original-title', 0)->plaintext . '

' . + $movie->find('.elco-baseline', 0)->plaintext . '
' . + $movie->find('.elco-baseline', 1)->plaintext . '

' . + $movie->find('.elco-description', 0)->plaintext . '

' . + trim($movie->find('.erra-ratings .erra-global', 0)->plaintext) . ' / 10'; + $item->id = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $item->uri = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $this->items[] = $item; + } + } + + public function getName() { + return $this->name; + } + + public function getURI() { + return (trim($this->uri) === "") ? "http://www.senscritique.com" : $this->uri; + } + + public function getCacheDuration(){ + return 0; // 1 hour + } + +} From 9b579c7cd444d7248c9cffb749d3a3eec8e3e06c Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Sat, 30 Jul 2016 18:19:36 +0200 Subject: [PATCH 007/149] Change Cache Duration Up to 6 hours --- bridges/SensCritiqueBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index c1aaa93..cfd9e1c 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -140,7 +140,7 @@ class SensCritiqueBridge extends BridgeAbstract { } public function getCacheDuration(){ - return 0; // 1 hour + return 21600; // 6 hours } } From d60a822bc21ea45e3567e2028a6d2b53dea7c0c4 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Sat, 30 Jul 2016 19:24:06 +0200 Subject: [PATCH 008/149] Update Flickr Bridge Fix the bridge (Flickr update?) I get the pictures titles with the API (slowing down the bridge). If you have any suggestions to replace them, feel free to change it. --- bridges/FlickrExploreBridge.php | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php index 9d83f27..524d9d9 100644 --- a/bridges/FlickrExploreBridge.php +++ b/bridges/FlickrExploreBridge.php @@ -5,21 +5,34 @@ class FlickrExploreBridge extends BridgeAbstract{ $this->maintainer = "sebsauvage"; $this->name = "Flickr Explore"; - $this->uri = "http://www.flickr.com/explore"; + $this->uri = "https://www.flickr.com/explore"; $this->description = "Returns the latest interesting images from Flickr"; - $this->update = "2014-05-25"; + $this->update = "2016-07-30"; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.flickr.com/explore') or $this->returnError('Could not request Flickr.', 404); - - foreach($html->find('span.photo_container') as $element) { + $html = $this->file_get_html('https://www.flickr.com/explore') or $this->returnError('Could not request Flickr.', 404); + + foreach($html->find('.photo-list-photo-view') as $element) { + // Get the styles + $style = explode(';', $element->style); + // Get the background-image style + $backgroundImage = explode(':', end($style)); + // URI type : url(//cX.staticflickr.com/X/XXXXX/XXXXXXXXX.jpg) + $imageURI = trim(str_replace(['url(', ')'], '', end($backgroundImage))); + // Get the image ID + $imageURIs = explode('_', basename($imageURI)); + $imageID = reset($imageURIs); + + // Get the image JSON via Flickr API + $imageJSON = json_decode(file_get_contents('https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=103b574d49bd51f0e18bfe907da44a0f&photo_id='.$imageID.'&format=json&nojsoncallback=1')) or $this->returnError('Could not request Flickr.', 404); // FIXME: Request time too long... + $item = new \Item(); - $item->uri = 'http://flickr.com'.$element->find('a',0)->href; - $item->thumbnailUri = $element->find('img',0)->getAttribute('data-defer-src'); + $item->uri = 'https://flickr.com/photo.gne?id='.$imageID; + $item->thumbnailUri = $imageURI; $item->content = ''; // FIXME: Filter javascript ? - $item->title = $element->find('a',0)->title; + $item->title = $imageJSON->photo->title->_content; $this->items[] = $item; } } @@ -29,7 +42,7 @@ class FlickrExploreBridge extends BridgeAbstract{ } public function getURI(){ - return 'http://www.flickr.com/explore'; + return 'https://www.flickr.com/explore'; } public function getCacheDuration(){ From fb595484b5e646726b7458774b47fe4ac43d6398 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Mon, 1 Aug 2016 02:06:59 +0200 Subject: [PATCH 009/149] Remove duplicate: FuturaSciencesw --- bridges/FSBridge.php | 56 -------------------------------------------- 1 file changed, 56 deletions(-) delete mode 100644 bridges/FSBridge.php diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php deleted file mode 100644 index 0071a74..0000000 --- a/bridges/FSBridge.php +++ /dev/null @@ -1,56 +0,0 @@ -maintainer = "qwertygc"; - $this->name = "Futurasciences"; - $this->uri = "http://www.futura-sciences.com"; - $this->description = "Returns the 5 newest posts from FS (full text)"; - $this->update = "03/11/2015"; - - } - - public function collectData(array $param){ - - function FS_StripCDATA($string) { - $string = str_replace('', '', $string); - return $string; - } - function FS_ExtractContent($url) { - $html2 = $this->file_get_html($url); - $text = $html2->find('div.fiche-actualite', 0)->innertext; - $text = preg_replace('@]*?>.*?@si', '', $text); - return $text; - } - $html = $this->file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); - $limit = 0; - - foreach($html->find('item') as $element) { - if($limit < 5) { - $item = new \Item(); - $item->title = FS_StripCDATA($element->find('title', 0)->innertext); - $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = FS_ExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } - } - - } - - public function getName(){ - return 'Futura Sciences'; - } - - public function getURI(){ - return 'http://www.futura-sciences.com/'; - } - - public function getCacheDuration(){ - return 3600; // 1 hour - // return 0; // 1 hour - } -} From b37877bc746fc2bd793585600413692cf0412faa Mon Sep 17 00:00:00 2001 From: prysme01 Date: Mon, 1 Aug 2016 15:18:32 +0200 Subject: [PATCH 010/149] very basic support of ArsTechnica --- bridges/ArstechnicaBridge.php | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 bridges/ArstechnicaBridge.php diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php new file mode 100644 index 0000000..80c8086 --- /dev/null +++ b/bridges/ArstechnicaBridge.php @@ -0,0 +1,81 @@ +maintainer = "prysme"; + $this->name = "ArstechnicaBridge"; + $this->uri = "http://arstechnica.com"; + $this->description = "The PC enthusiast's resource. Power users and the tools they love, without computing religion"; + $this->update = "01/08/2016"; + + } + + public function collectData(array $param) { + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + #echo $url; + $html2 = file_get_html($url); + + $text = $html2->find("section[id='article-guts']", 0); + $text = StripWithDelimiters($text->innertext,''); + $text = StripWithDelimiters($text,'
','
'); + $text = StripWithDelimiters($text,''); + $text = StripWithDelimiters($text,'
','
'); + $text = StripWithDelimiters($text,'
'); + $text = StripWithDelimiters($text,'
  • '); + //$text = strip_tags($text->innertext, '

    '); + #print_r("ICI"); + #print_r($text); + #print_r("FIN"); + return $text; + } + + $html = $this->file_get_html('http://feeds.arstechnica.com/arstechnica/index') or $this->returnError('Could not request NextInpact.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->thumbnailUri = StripCDATA($element->find('enclosure', 0)->url); + $item->author = StripCDATA($element->find('author', 0)->innertext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + //$item->content = $item->uri; + $this->items[] = $item; + $limit++; + } + } + +} + + + public function getName() { + return 'ArsTechnica'; + } + + public function getCacheDuration() { + return 0; // 2h + } + + public function getURI() { + return "http://arstechnica.com"; + } + +} From e3cf486ac56452efc989d475ab299499e94ac80c Mon Sep 17 00:00:00 2001 From: prysme01 Date: Mon, 1 Aug 2016 16:16:18 +0200 Subject: [PATCH 011/149] Better tag handling --- bridges/ArstechnicaBridge.php | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php index 80c8086..f9a7398 100644 --- a/bridges/ArstechnicaBridge.php +++ b/bridges/ArstechnicaBridge.php @@ -32,16 +32,14 @@ class ArstechnicaBridge extends BridgeAbstract { $html2 = file_get_html($url); $text = $html2->find("section[id='article-guts']", 0); + /*foreach ($text->find('

  • '; return $card; From 8f76eebddb3d18cd70033a72c42b5cf146d1f353 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 20:29:40 +0200 Subject: [PATCH 034/149] Fix parameters list Fixes warning: "array_key_exists() expects parameter 2 to be array, string given in /volume1/web/rss-bridge_dev/lib/HTMLUtils.php on line 59 Warning: Invalid argument supplied for foreach() in /volume1/web/rss-bridge_dev/lib/HTMLUtils.php on line 64 --- bridges/ElsevierBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index bb73477..9265a0c 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -12,9 +12,9 @@ class ElsevierBridge extends BridgeAbstract{ $this->name = 'Elsevier journals recent articles'; $this->uri = 'http://www.journals.elsevier.com'; $this->description = 'Returns the recent articles published in Elsevier journals'; - $this->update = '2016-06-26'; + $this->update = '2016-08-02'; - $this->parameters= + $this->parameters[] = '[ { "name" : "Journal name", From 399fce06ce5f5eb95ec2f91914d970ba9db32836 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 20:35:27 +0200 Subject: [PATCH 035/149] Require input field, add example value and title Previously the bridge could be requested without any journal causing error 403 due to broken URL. --- bridges/ElsevierBridge.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 9265a0c..408fc45 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -18,7 +18,10 @@ class ElsevierBridge extends BridgeAbstract{ '[ { "name" : "Journal name", - "identifier" : "j" + "identifier" : "j", + "required" : "true", + "exampleValue" : "academic-pediatrics", + "title" : "Insert html-part of your journal" } ]'; } From f7839697213d9a2c93b7d52519444ffd963f2c65 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 21:35:13 +0200 Subject: [PATCH 036/149] Create member functions to extract information from articles The extractor function will handle many situations more specifically in order to provide better results. --- bridges/ElsevierBridge.php | 47 +++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 408fc45..7911feb 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -26,6 +26,47 @@ class ElsevierBridge extends BridgeAbstract{ ]'; } + // Extracts the list of names from an article as string + function ExtractArticleName ($article){ + $names = $article->find('small', 0); + if($names) + return trim($names->plaintext); + return ''; + } + + // Extracts the timestamp from an article + function ExtractArticleTimestamp ($article){ + $time = $article->find('.article-info', 0); + if($time){ + $timestring = trim($time->plaintext); + /* + The format depends on the age of an article: + - Available online 29 July 2016 + - July 2016 + - May–June 2016 + */ + if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } else { + return 0; + } + } + return 0; + } + + // Extracts the content from an article + function ExtractArticleContent ($article){ + $content = $article->find('.article-content', 0); + if($content){ + return trim($content->plaintext); + } + return ''; + } + public function collectData(array $param){ $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/'; $html = file_get_html($uri) @@ -36,9 +77,9 @@ class ElsevierBridge extends BridgeAbstract{ $item = new \Item(); $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y'; $item->title=$article->find('.pod-listing-header>a',0)->plaintext; - $item->name=trim($article->find('small',0)->plaintext); - $item->timestamp=strtotime($article->find('.article-info',0)->plaintext); - $item->content=trim($article->find('.article-content',0)->plaintext); + $item->name=$this->ExtractArticleName($article); + $item->timestamp=$this->ExtractArticleTimestamp($article); + $item->content=$this->ExtractArticleContent($article); $this->items[]=$item; } From a1c680f8e8dac7cb8086cf469d4ff05b469d4b90 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 2 Aug 2016 21:40:22 +0200 Subject: [PATCH 037/149] Fix indentation and improve code style - Use tab instead of spaces - Remove obsolete bridge description at start of the file - Add spaces at the assignment operator ('=' -> ' = ') - Remove unnecessary empty lines --- bridges/ElsevierBridge.php | 164 ++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 86 deletions(-) diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 7911feb..4f4cd57 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -1,99 +1,91 @@ maintainer = 'Pierre Mazière'; - $this->name = 'Elsevier journals recent articles'; - $this->uri = 'http://www.journals.elsevier.com'; - $this->description = 'Returns the recent articles published in Elsevier journals'; - $this->update = '2016-08-02'; + $this->maintainer = 'Pierre Mazière'; + $this->name = 'Elsevier journals recent articles'; + $this->uri = 'http://www.journals.elsevier.com'; + $this->description = 'Returns the recent articles published in Elsevier journals'; + $this->update = '2016-08-02'; - $this->parameters[] = - '[ - { - "name" : "Journal name", - "identifier" : "j", - "required" : "true", - "exampleValue" : "academic-pediatrics", - "title" : "Insert html-part of your journal" - } - ]'; - } + $this->parameters[] = + '[ + { + "name" : "Journal name", + "identifier" : "j", + "required" : "true", + "exampleValue" : "academic-pediatrics", + "title" : "Insert html-part of your journal" + } + ]'; + } - // Extracts the list of names from an article as string - function ExtractArticleName ($article){ - $names = $article->find('small', 0); - if($names) - return trim($names->plaintext); - return ''; - } + // Extracts the list of names from an article as string + function ExtractArticleName ($article){ + $names = $article->find('small', 0); + if($names) + return trim($names->plaintext); + return ''; + } - // Extracts the timestamp from an article - function ExtractArticleTimestamp ($article){ - $time = $article->find('.article-info', 0); - if($time){ - $timestring = trim($time->plaintext); - /* - The format depends on the age of an article: - - Available online 29 July 2016 - - July 2016 - - May–June 2016 - */ - if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){ - return strtotime($matches[0]); - } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ - return strtotime($matches[0]); - } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ - return strtotime($matches[0]); - } else { - return 0; - } - } - return 0; - } + // Extracts the timestamp from an article + function ExtractArticleTimestamp ($article){ + $time = $article->find('.article-info', 0); + if($time){ + $timestring = trim($time->plaintext); + /* + The format depends on the age of an article: + - Available online 29 July 2016 + - July 2016 + - May–June 2016 + */ + if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } else { + return 0; + } + } + return 0; + } - // Extracts the content from an article - function ExtractArticleContent ($article){ - $content = $article->find('.article-content', 0); - if($content){ - return trim($content->plaintext); - } - return ''; - } + // Extracts the content from an article + function ExtractArticleContent ($article){ + $content = $article->find('.article-content', 0); + if($content){ + return trim($content->plaintext); + } + return ''; + } - public function collectData(array $param){ - $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/'; - $html = file_get_html($uri) - or $this->returnError('No results for Elsevier journal '.$param['j'], 404); + public function collectData(array $param){ + $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/'; + $html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404); - foreach($html->find('.pod-listing') as $article){ + foreach($html->find('.pod-listing') as $article){ + $item = new \Item(); + $item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y'; + $item->title = $article->find('.pod-listing-header>a',0)->plaintext; + $item->name = $this->ExtractArticleName($article); + $item->timestamp = $this->ExtractArticleTimestamp($article); + $item->content = $this->ExtractArticleContent($article); + $this->items[] = $item; + } + } - $item = new \Item(); - $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y'; - $item->title=$article->find('.pod-listing-header>a',0)->plaintext; - $item->name=$this->ExtractArticleName($article); - $item->timestamp=$this->ExtractArticleTimestamp($article); - $item->content=$this->ExtractArticleContent($article); + public function getName(){ + return 'Elsevier journals recent articles'; + } - $this->items[]=$item; - } - } + public function getURI(){ + return 'http://www.journals.elsevier.com'; + } - public function getName(){ - return 'Elsevier journals recent articles'; - } - - public function getURI(){ - return 'http://www.journals.elsevier.com'; - } - - public function getCacheDuration(){ - return 43200; // 12h - } + public function getCacheDuration(){ + return 43200; // 12h + } } +?> \ No newline at end of file From 6b7b38db54c4108ed7ba798093ae89e89ab96638 Mon Sep 17 00:00:00 2001 From: Teromene Date: Tue, 2 Aug 2016 22:16:43 +0200 Subject: [PATCH 038/149] Improved new style. --- css/style.css | 65 +++++++++++++++++++++++++++++++++++++++++++++++ index.php | 11 ++++---- lib/HTMLUtils.php | 3 +++ 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/css/style.css b/css/style.css index e378f57..4ad2a3e 100644 --- a/css/style.css +++ b/css/style.css @@ -58,6 +58,20 @@ section { } + +section:last-of-type { + + opacity: 0.5; + +} + +section:last-of-type:hover { + + opacity: 1; + +} + + section > h2 { font-size: 200%; @@ -90,6 +104,13 @@ button { } +button.small { + + width: auto; + line-height: 1.2em; + +} + .description { margin: 10px; @@ -128,3 +149,47 @@ input[type="text"] { padding-left: 4px; } + +form { + + display: none; + +} + +h5 { + + display: none; + +} + +.showmore-box { + + display: none; + +} + +.showmore, .showless { + + color: #888888; + +} + +.showmore-box:checked ~ .showmore { + + display: none; + +} + +.showmore-box:not(:checked) ~ .showless { + + display: none; + +} + + + +.showmore-box:checked ~ form, .showmore-box:checked ~ h5 { + + display: block; + +} diff --git a/index.php b/index.php index d81b8c5..57cb12f 100644 --- a/index.php +++ b/index.php @@ -182,11 +182,12 @@ $formats = Format::searchInformation(); $inactiveBridges .= HTMLUtils::displayBridgeCard($bridgeName, $formats, false) . PHP_EOL; } } - echo '
    ' . $inactiveBridges; + echo $inactiveBridges; ?> -
    +
    + RSS-Bridge alpha 0.2 ~ Public Domain
    + / active bridges.
    +
    +
    diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index ce08efa..349da14 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -35,6 +35,8 @@ class HTMLUtils {

    {$description}

    + + CARD; // If we don't have any parameter for the bridge, we print a generic form to load it. @@ -141,6 +143,7 @@ CARD; } + $card .= ''; $card .= '

    '.$bridgeElement->maintainer.'

    '; $card .= ''; From 71d868fd904d31abb15b2ab8a69c9d9b64d0ef69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Michon?= Date: Tue, 2 Aug 2016 23:26:00 +0200 Subject: [PATCH 039/149] Add scalingo.json --- scalingo.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 scalingo.json diff --git a/scalingo.json b/scalingo.json new file mode 100644 index 0000000..f4c4ffd --- /dev/null +++ b/scalingo.json @@ -0,0 +1,6 @@ +{ + "name": "RSS Bridge", + "description": "rss-bridge is a PHP project capable of generating ATOM feeds for websites which don't have one.", + "repository": "https://github.com/sebsauvage/rss-bridge", + "website": "https://github.com/sebsauvage/rss-bridge", +} From 173afb8314de0bb34181f0716a3046cf1b6f2e0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Michon?= Date: Tue, 2 Aug 2016 23:32:30 +0200 Subject: [PATCH 040/149] Update readme: Add scalingo one click deploy --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 4bc918d..07ba235 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ By default, the script creates `whitelist.txt` and adds the main bridges (see ab * to disable main bridges (remove the line) New bridges are disabled by default, so make sure to check regularly what's new and whitelist what you want ! + +[![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) Author === From e7e2101889fbdf31ef0e204937a2b6ce1b04721e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Michon?= Date: Tue, 2 Aug 2016 23:37:01 +0200 Subject: [PATCH 041/149] Typo --- README.md | 2 +- scalingo.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 07ba235..3dccf45 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ By default, the script creates `whitelist.txt` and adds the main bridges (see ab * to enable extra bridges (one bridge per line) * to disable main bridges (remove the line) -New bridges are disabled by default, so make sure to check regularly what's new and whitelist what you want ! +New bridges are disabled by default, so make sure to check regularly what's new and whitelist what you want! [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) diff --git a/scalingo.json b/scalingo.json index f4c4ffd..9b1d51e 100644 --- a/scalingo.json +++ b/scalingo.json @@ -2,5 +2,5 @@ "name": "RSS Bridge", "description": "rss-bridge is a PHP project capable of generating ATOM feeds for websites which don't have one.", "repository": "https://github.com/sebsauvage/rss-bridge", - "website": "https://github.com/sebsauvage/rss-bridge", + "website": "https://github.com/sebsauvage/rss-bridge" } From 7ff901de08b767afedfc991e77107cc74aafe302 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 3 Aug 2016 12:37:56 +0200 Subject: [PATCH 042/149] Change all nested functions to member functions This fixes error "Using $this when not in object context" Nested functions are not part of the object and therefore don't have access to the object instance $this! --- bridges/DeveloppezDotComBridge.php | 66 +++++++++++++++--------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index 9c147a8..57cd2cd 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -11,48 +11,48 @@ class DeveloppezDotComBridge extends BridgeAbstract{ } + function DeveloppezDotComStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly encoded, here was the trick: http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php + { + $search = array(chr(145), + chr(146), + chr(147), + chr(148), + chr(151)); + + $replace = array("'", + "'", + '"', + '"', + '-'); + + return str_replace($search, $replace, $string); + } + + function DeveloppezDotComExtractContent($url) { + $articleHTMLContent = $this->file_get_html($url); + $text = $this->convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); + $text = utf8_encode($text); + return trim($text); + } + public function collectData(array $param){ - function DeveloppezDotComStripCDATA($string) { - $string = str_replace('', '', $string); - return $string; - } - - function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly encoded, here was the trick: http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php - { - $search = array(chr(145), - chr(146), - chr(147), - chr(148), - chr(151)); - - $replace = array("'", - "'", - '"', - '"', - '-'); - - return str_replace($search, $replace, $string); - } - - function DeveloppezDotComExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); - $text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); - $text = utf8_encode($text); - return trim($text); - } - $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); $limit = 0; foreach($rssFeed->find('item') as $element) { if($limit < 10) { $item = new \Item(); - $item->title = DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); - $item->uri = DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); + $item->title = $this->DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); + $item->uri = $this->DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $content = DeveloppezDotComExtractContent($item->uri); + $content = $this->DeveloppezDotComExtractContent($item->uri); $item->content = strlen($content) ? $content : $element->description;//In case of it is a tutorial, we just keep the original description $this->items[] = $item; $limit++; From 58b3261fbbdbd7c3b18eb0e0d42e42bbbb70362e Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 3 Aug 2016 12:42:57 +0200 Subject: [PATCH 043/149] Fix indentation and remove empty lines --- bridges/DeveloppezDotComBridge.php | 60 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index 57cd2cd..51dc013 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -2,13 +2,11 @@ class DeveloppezDotComBridge extends BridgeAbstract{ public function loadMetadatas() { - $this->maintainer = "polopollo"; $this->name = "Developpez.com Actus (FR)"; $this->uri = "http://www.developpez.com/"; $this->description = "Returns the 15 newest posts from DeveloppezDotCom (full text)."; - $this->update = "2014-07-14"; - + $this->update = "2016-08-03"; } function DeveloppezDotComStripCDATA($string) { @@ -17,7 +15,9 @@ class DeveloppezDotComBridge extends BridgeAbstract{ return $string; } - function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly encoded, here was the trick: http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php + // F***ing quotes from Microsoft Word badly encoded, here was the trick: + // http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php + function convert_smart_quotes($string) { $search = array(chr(145), chr(146), @@ -41,35 +41,33 @@ class DeveloppezDotComBridge extends BridgeAbstract{ return trim($text); } - public function collectData(array $param){ + public function collectData(array $param){ + $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); + $limit = 0; - $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); - $limit = 0; + foreach($rssFeed->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = $this->DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); + $item->uri = $this->DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $content = $this->DeveloppezDotComExtractContent($item->uri); + $item->content = strlen($content) ? $content : $element->description; //In case of it is a tutorial, we just keep the original description + $this->items[] = $item; + $limit++; + } + } + } - foreach($rssFeed->find('item') as $element) { - if($limit < 10) { - $item = new \Item(); - $item->title = $this->DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); - $item->uri = $this->DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $content = $this->DeveloppezDotComExtractContent($item->uri); - $item->content = strlen($content) ? $content : $element->description;//In case of it is a tutorial, we just keep the original description - $this->items[] = $item; - $limit++; - } - } + public function getName(){ + return 'DeveloppezDotCom'; + } - } + public function getURI(){ + return 'http://www.developpez.com/'; + } - public function getName(){ - return 'DeveloppezDotCom'; - } - - public function getURI(){ - return 'http://www.developpez.com/'; - } - - public function getCacheDuration(){ - return 1800; // 30min - } + public function getCacheDuration(){ + return 1800; // 30min + } } From 7740dbfcc95e9a2f3537066c927e3da48d96bf49 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 3 Aug 2016 12:50:24 +0200 Subject: [PATCH 044/149] [Frandroid] remove bridge: website supported by WordPress bridge --- bridges/FrandroidBridge.php | 69 ------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 bridges/FrandroidBridge.php diff --git a/bridges/FrandroidBridge.php b/bridges/FrandroidBridge.php deleted file mode 100644 index 8b1625c..0000000 --- a/bridges/FrandroidBridge.php +++ /dev/null @@ -1,69 +0,0 @@ -maintainer = "Daiyousei"; - $this->name = "Frandroid"; - $this->uri = "http://www.frandroid.com/"; - $this->description = "Returns the RSS feed from Frandroid (full text articles)"; - $this->update = "2015-03-05"; - - } - - public function collectData(array $param) - { - - function FrandroidStripCDATA($string) - { - $string = str_replace('', '', $string); - return $string; - } - function FrandroidExtractContent($url) - { - $html2 = $this->file_get_html($url); - $html3 = $html2->find('div.post-content', 0); - $html3->find('div.no-sidebar-ad-top', 0)->outertext = ''; - $ret = $html3->find('div.shortcode-container'); - foreach ($ret as $value) { - $value->outertext = ''; - } - - $html3->find('div#hrr-link', 0)->outertext = ''; - $text = $html3->innertext; - $text = strip_tags($text, '