From 0e27a6ebabf7c2d39ea0849bad5b8adf573f4297 Mon Sep 17 00:00:00 2001
From: ORelio
Date: Tue, 19 Jul 2016 19:35:43 +0200
Subject: [PATCH 1/4] [Numerama] Coding style: reindent code
---
bridges/NumeramaBridge.php | 88 +++++++++++++++++++-------------------
1 file changed, 44 insertions(+), 44 deletions(-)
diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php
index 4099178..f5b24ea 100644
--- a/bridges/NumeramaBridge.php
+++ b/bridges/NumeramaBridge.php
@@ -1,60 +1,60 @@
maintainer = "mitsukarenai";
- $this->name = "Numerama";
- $this->uri = "http://www.numerama.com/";
- $this->description = "Returns the 5 newest posts from Numerama (full text)";
- $this->update = "2015-10-12";
-
- }
-
- public function collectData(array $param){
-
- function NumeramaStripCDATA($string) {
- $string = str_replace('', '', $string);
- return $string;
- }
-
- function NumeramaExtractContent($url)
- {
- $html2 = file_get_html($url);
- $text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
- $text = ''; // add post picture
- $text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
- return $text;
- }
-
- $html = $this->file_get_html('http://www.numerama.com/feed/') or $this->returnError('Could not request Numerama.', 404);
- $limit = 0;
-
- foreach($html->find('item') as $element) {
- if($limit < 5) {
- $item = new \Item();
- $item->title = html_entity_decode(NumeramaStripCDATA($element->find('title', 0)->innertext));
- $item->author = NumeramaStripCDATA($element->find('dc:creator', 0)->innertext);
- $item->uri = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
- $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
- $item->content = NumeramaExtractContent($item->uri);
- $this->items[] = $item;
- $limit++;
- }
- }
+ $this->maintainer = 'mitsukarenai';
+ $this->name = 'Numerama';
+ $this->uri = 'http://www.numerama.com/';
+ $this->description = 'Returns the 5 newest posts from Numerama (full text)';
+ $this->update = '2015-10-12';
}
- public function getName(){
+ public function collectData(array $param) {
+
+ function NumeramaStripCDATA($string) {
+ $string = str_replace('', '', $string);
+ return $string;
+ }
+
+ function NumeramaExtractContent($url)
+ {
+ $html2 = file_get_html($url);
+ $text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
+ $text = ''; // add post picture
+ $text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
+ return $text;
+ }
+
+ $html = $this->file_get_html('http://www.numerama.com/feed/') or $this->returnError('Could not request Numerama.', 404);
+ $limit = 0;
+
+ foreach($html->find('item') as $element) {
+ if($limit < 5) {
+ $item = new \Item();
+ $item->title = html_entity_decode(NumeramaStripCDATA($element->find('title', 0)->innertext));
+ $item->author = NumeramaStripCDATA($element->find('dc:creator', 0)->innertext);
+ $item->uri = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
+ $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
+ $item->content = NumeramaExtractContent($item->uri);
+ $this->items[] = $item;
+ $limit++;
+ }
+ }
+
+ }
+
+ public function getName() {
return 'Numerama';
}
- public function getURI(){
+ public function getURI() {
return 'http://www.numerama.com/';
}
- public function getCacheDuration(){
+ public function getCacheDuration() {
return 1800; // 30min
}
}
From 2f3bddb2915012de132a1b512dcf16105bff8180 Mon Sep 17 00:00:00 2001
From: ORelio
Date: Tue, 19 Jul 2016 19:37:06 +0200
Subject: [PATCH 2/4] [Numerama] Use proxied file_get_html
---
bridges/NumeramaBridge.php | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php
index f5b24ea..e1829c3 100644
--- a/bridges/NumeramaBridge.php
+++ b/bridges/NumeramaBridge.php
@@ -7,7 +7,7 @@ class NumeramaBridge extends BridgeAbstract{
$this->name = 'Numerama';
$this->uri = 'http://www.numerama.com/';
$this->description = 'Returns the 5 newest posts from Numerama (full text)';
- $this->update = '2015-10-12';
+ $this->update = '2016-07-19';
}
@@ -19,9 +19,9 @@ class NumeramaBridge extends BridgeAbstract{
return $string;
}
- function NumeramaExtractContent($url)
+ function NumeramaExtractContent($bridge, $url)
{
- $html2 = file_get_html($url);
+ $html2 = $bridge->file_get_html($url);
$text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
$text = ''; // add post picture
$text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
@@ -38,7 +38,7 @@ class NumeramaBridge extends BridgeAbstract{
$item->author = NumeramaStripCDATA($element->find('dc:creator', 0)->innertext);
$item->uri = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
- $item->content = NumeramaExtractContent($item->uri);
+ $item->content = NumeramaExtractContent($this, $item->uri);
$this->items[] = $item;
$limit++;
}
From d5eb53ecd21784489a202e57ca786e8050443d20 Mon Sep 17 00:00:00 2001
From: ORelio
Date: Tue, 19 Jul 2016 19:37:33 +0200
Subject: [PATCH 3/4] [WeLiveSecurity] New bridge, Security News.
---
bridges/WeLiveSecurityBridge.php | 75 ++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 bridges/WeLiveSecurityBridge.php
diff --git a/bridges/WeLiveSecurityBridge.php b/bridges/WeLiveSecurityBridge.php
new file mode 100644
index 0000000..cdb04bd
--- /dev/null
+++ b/bridges/WeLiveSecurityBridge.php
@@ -0,0 +1,75 @@
+maintainer = 'ORelio';
+ $this->name = $this->getName();
+ $this->uri = $this->getURI();
+ $this->description = 'Returns the newest articles.';
+ $this->update = '2016-07-19';
+ }
+
+ public function collectData(array $param) {
+
+ function ExtractFromDelimiters($string, $start, $end) {
+ if (strpos($string, $start) !== false) {
+ $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
+ $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
+ return $section_retrieved;
+ } return false;
+ }
+
+ function StripWithDelimiters($string, $start, $end) {
+ while (strpos($string, $start) !== false) {
+ $section_to_remove = substr($string, strpos($string, $start));
+ $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
+ $string = str_replace($section_to_remove, '', $string);
+ } return $string;
+ }
+
+ $feed = $this->getURI().'feed/';
+ $html = $this->file_get_html($feed) or $this->returnError('Could not request '.$this->getName().': '.$feed, 500);
+ $limit = 0;
+
+ foreach ($html->find('item') as $element) {
+ if ($limit < 5) {
+
+ $article_image = $element->find('image', 0)->plaintext;
+ $article_url = ExtractFromDelimiters($element->innertext, '', '');
+ $article_summary = ExtractFromDelimiters($element->innertext, '', '
');
+ $article_html = file_get_contents($article_url) or $this->returnError('Could not request '.$this->getName().': '.$article_url, 500);
+ if (substr($article_html, 0, 2) == "\x1f\x8b") //http://www.gzip.org/zlib/rfc-gzip.html#header-trailer -> GZip ID1
+ $article_html = gzdecode($article_html); //Response is GZipped even if we didn't accept GZip!? Let's decompress...
+ $article_html = str_get_html($article_html); //Now we have our HTML data. But still, that's an important HTTP violation...
+ $article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;
+ $article_content = StripWithDelimiters($article_content, '');
+ $contents = StripWithDelimiters($contents, '');
+ $contents = StripWithDelimiters($contents, '