From 6f8b93fb46a02d7c2b059be94ed1d9c8b8b4081a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Wed, 14 Sep 2016 13:31:43 +0200 Subject: [PATCH 01/11] [bridges] removed since they just parse the site feed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/GuruMedBridge.php | 32 -------------------------- bridges/ZoneTelechargementBridge.php | 34 ---------------------------- 2 files changed, 66 deletions(-) delete mode 100644 bridges/GuruMedBridge.php delete mode 100644 bridges/ZoneTelechargementBridge.php diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php deleted file mode 100644 index 0c4bf2c..0000000 --- a/bridges/GuruMedBridge.php +++ /dev/null @@ -1,32 +0,0 @@ -', '', $string); - return $string; - } - - public function collectData(){ - $html = $this->getSimpleHTMLDOM(self::URI.'feed') - or $this->returnServerError('Could not request Gurumed.'); - $limit = 0; - - foreach($html->find('item') as $element) { - if($limit < 5) { - $item = array(); - $item['title'] = $this->GurumedStripCDATA($element->find('title', 0)->innertext); - $item['uri'] = $this->GurumedStripCDATA($element->find('guid', 0)->plaintext); - $item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext); - $item['content'] = $this->GurumedStripCDATA(strip_tags($element->find('description', 0), '


')); - $this->items[] = $item; - $limit++; - } - } - } -} diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php deleted file mode 100644 index ee54783..0000000 --- a/bridges/ZoneTelechargementBridge.php +++ /dev/null @@ -1,34 +0,0 @@ -You may specify a category found in RSS URLs, else main feed is selected.'; - - const PARAMETERS = array( array( - 'category'=>array('name'=>'Category') - )); - - public function collectData(){ - - function StripCDATA($string) { - $string = str_replace('', '', $string); - return $string; - } - - $url = $this->getURI().$this->getInput('category').'rss.xml'; - $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); - - foreach($html->find('item') as $element) { - $item = array(); - $item['title'] = $element->find('title', 0)->plaintext; - $item['uri'] = str_replace('http://', 'https://', $element->find('guid', 0)->plaintext); - $item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext); - $item['content'] = StripCDATA($element->find('description', 0)->innertext); - $this->items[] = $item; - $limit++; - } - } -} From 244516f0a29329f47c7eb9953b66b91cc218a5a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Wed, 14 Sep 2016 13:32:34 +0200 Subject: [PATCH 02/11] [GizmodoFRBridge => GizmodoBridge] rename and make it a FeedExpander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/GizmodoBridge.php | 35 +++++++++++++++++++++++++++ bridges/GizmodoFRBridge.php | 48 ------------------------------------- 2 files changed, 35 insertions(+), 48 deletions(-) create mode 100644 bridges/GizmodoBridge.php delete mode 100644 bridges/GizmodoFRBridge.php diff --git a/bridges/GizmodoBridge.php b/bridges/GizmodoBridge.php new file mode 100644 index 0000000..0fc80a5 --- /dev/null +++ b/bridges/GizmodoBridge.php @@ -0,0 +1,35 @@ +getSimpleHTMLDOMCached($item['uri']); + if(!$articleHTMLContent){ + $text = 'Could not load '.$item['uri']; + }else{ + $text = $articleHTMLContent->find('div.entry-content', 0)->innertext; + foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { + $text .= '

link to a iframe (could be a video): '.$element->src.'


'; + } + + $text = strip_tags($text, '

'); + } + + $item['content'] = $text; + return $item; + } + + public function collectData(){ + $this->collectExpandableDatas('http://feeds.gawker.com/gizmodo/full'); + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php deleted file mode 100644 index e653130..0000000 --- a/bridges/GizmodoFRBridge.php +++ /dev/null @@ -1,48 +0,0 @@ -getSimpleHTMLDOM($url); - if(!$articleHTMLContent){ - return 'Could not load '.$url; - } - $text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext; - $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; - $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; - foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { - $text = $text.'

link to a iframe (could be a video): '.$element->src.'


'; - } - - $text = strip_tags($text, '

'); - return $text; - } - - $rssFeed = $this->getSimpleHTMLDOM(self::URI.'/feed') - or $this->returnServerError('Could not request '.self::URI.'/feed'); - $limit = 0; - - foreach($rssFeed->find('item') as $element) { - if($limit < 15) { - $item = array(); - $item['title'] = $element->find('title', 0)->innertext; - $item['uri'] = $element->find('guid', 0)->plaintext; - $item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext); - $item['content'] = GizmodoFRExtractContent($item['uri']); - $this->items[] = $item; - $limit++; - } - } - - } - - public function getCacheDuration(){ - return 1800; // 30min - } -} From 8f58c9f86bc7bbb384430e42f45228d5f75be3e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Wed, 14 Sep 2016 13:33:38 +0200 Subject: [PATCH 03/11] [bridges] make them FeedExpander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/ArstechnicaBridge.php | 78 ++++++---------------- bridges/KoreusBridge.php | 36 +++------- bridges/NakedSecurityBridge.php | 109 +++++++++++++++---------------- bridges/NeuviemeArtBridge.php | 89 ++++++++++++------------- bridges/Rue89Bridge.php | 34 ++-------- bridges/SiliconBridge.php | 72 ++++++++------------ bridges/WeLiveSecurityBridge.php | 78 +++++++++------------- 7 files changed, 190 insertions(+), 306 deletions(-) diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php index 2620715..0146a42 100644 --- a/bridges/ArstechnicaBridge.php +++ b/bridges/ArstechnicaBridge.php @@ -1,66 +1,30 @@ ', '', $string); - return $string; - } + $html = $this->getSimpleHTMLDOMCached($item['uri']); + if(!$html){ + $item['content'] .= '

Requesting full article failed.

'; + }else{ + $item['content'] = $html->find('.article-guts', 0); + } - function ExtractContent($url) { - #echo $url; - $html2 = $this->getSimpleHTMLDOM($url); + return $item; + } - $text = $html2->find("section[id='article-guts']", 0); - /*foreach ($text->find('