diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php index 0146a42..0c90fc5 100644 --- a/bridges/ArstechnicaBridge.php +++ b/bridges/ArstechnicaBridge.php @@ -1,30 +1,11 @@ getSimpleHTMLDOMCached($item['uri']); - if(!$html){ - $item['content'] .= '

Requesting full article failed.

'; - }else{ - $item['content'] = $html->find('.article-guts', 0); - } - - return $item; - } - - public function collectData(){ - $this->collectExpandableDatas('http://feeds.arstechnica.com/arstechnica/index/'); - } - - public function getCacheDuration() { - return 7200; // 2h - } - + const PARAMETERS = array(); } diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index ab4d3d8..f56a083 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -1,22 +1,11 @@ getSimpleHTMLDOMCached($item['uri']); - $content = $articlePage->find('.post-container', 0); - $item['content'] = $content->innertext; - - return $item; - } + const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox)"; + const PARAMETERS = array(); } diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index a82d815..0d6a2d1 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -1,42 +1,13 @@ collectExpandableDatas(self::URI . 'rss', 5); - } - - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); - $item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']); - return $item; - } - - private function LeJournalDuGeekExtractContent($url) { - $articleHTMLContent = $this->getSimpleHTMLDOMCached($url); - $text = $articleHTMLContent->find('div.post-content', 0)->innertext; - - foreach($articleHTMLContent->find('a.more') as $element) { - if ($element->innertext == "Source") { - $text = $text . '

Source : ' . $element->href . '

'; - break; - } - } - - foreach($articleHTMLContent->find('iframe') as $element) { - if (preg_match("/youtube/i", $element->src)) { - $text = $text . '// An IFRAME to Youtube was included in the article: ' . $element->src . '
'; - } - } - - $text = preg_replace('#(.*?)#is', '', $text); - $text = strip_tags($text, '



  • '); - return $text; - } + const DESCRIPTION = "Returns the newest posts from LeJournalDuGeek (full text)."; + const PARAMETERS = array(); public function getCacheDuration(){ return 1800; // 30min diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 4360772..ed2fb3d 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -1,61 +1,11 @@ '; - $close_tag_length = strlen($close_tag); - if (strpos($tag_start, $open_tag) === 0) { - while (strpos($string, $tag_start) !== false) { - $max_recursion = 100; - $section_to_remove = null; - $section_start = strpos($string, $tag_start); - $search_offset = $section_start; - do { - $max_recursion--; - $section_end = strpos($string, $close_tag, $search_offset); - $search_offset = $section_end + $close_tag_length; - $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); - $open_tag_count = substr_count($section_to_remove, $open_tag); - $close_tag_count = substr_count($section_to_remove, $close_tag); - } while ($open_tag_count > $close_tag_count && $max_recursion > 0); - $string = str_replace($section_to_remove, '', $string); - } - } - return $string; - } - - - protected function parseItem($item){ - $item = parent::parseItem($item); - - $article_html = $this->getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] = 'Could not request '.$this->getName().': '.$item['uri']; - return $item; - } - - $article_image = $article_html->find('img.wp-post-image', 0)->src; - $article_content = $article_html->find('div.entry-content', 0)->innertext; - $article_content = $this->StripRecursiveHTMLSection($article_content , 'div', '
    StripRecursiveHTMLSection($article_content , 'script', 'StripRecursiveHTMLSection($article_content , 'aside', '

    '.$item['content'].'

    '.$article_content; - - $item['content'] = $article_content; - - return $item; - - } - - public function collectData(){ - - $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); } diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 34296b5..39d7340 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,29 +1,15 @@ collectExpandableDatas(self::URI . 'feed/', 5); - } - - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); - $item['content'] = $this->ExtractContent($item['uri']); - return $item; - } - - private function ExtractContent($url){ - $article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url); - $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block - $contents = ''; // add post picture - return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post - } - + const DESCRIPTION = 'Returns the newest posts from Numerama (full text)'; + const PARAMETERS = array(); public function getCacheDuration() { + return 1800; // 30min } } diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index f48bffa..90b9641 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -1,39 +1,13 @@ getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] .= '

    Could not request Silicon: '.$item['uri'].'

    '; - return $item; - } - - $article_content = '

    '.$article_html->find('div.entry-excerpt', 0)->plaintext.'

    ' - .$article_html->find('div.entry-content', 0)->innertext; - - //Remove useless scripts left in the page - while (strpos($article_content, '') + 9); - $article_content = str_replace($script_section, '', $article_content); - } - - $item['content'] = $article_content; - - return $item; - } - - public function collectData(){ - $feedUrl = self::URI.'feed'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); public function getCacheDuration() { return 1800; // 30 minutes diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index b805d60..d1e0819 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -1,41 +1,13 @@ getSimpleHTMLDOM(self::URI) or $this->returnServerError('Could not request ' . self::URI); - - $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); - foreach ($recent_posts as $article) { - if (count($this->items) < 5) { - $uri = $article->find('a', 0)->href; - $this->items[] = $this->getDetails($uri); - } - } - } - - private function getDetails($uri) { - $html = $this->getSimpleHTMLDOM($uri) or exit; - - $item = array(); - - $article = $html->find('.gdl-blog-full', 0); - $item['uri'] = $uri; - $item['title'] = $article->find('.blog-title', 0)->find('a', 0)->innertext; - $item['content'] = $article->find('.blog-content', 0)->innertext; - $item['timestamp'] = $this->getTimestampFromDate($article->find('.blog-date', 0)->find('a', 0)->href); - return $item; - } - - private function getTimestampFromDate($uri) { - preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches); - $date = new \DateTime($matches[0]); - return $date->format('U'); - } + const PARAMETERS = array(); public function getCacheDuration() { return 7200; // 2h