From 3f64d2d65a747504992dda951aaa616827c0db04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Thu, 15 Sep 2016 12:40:26 +0200 Subject: [PATCH] [bridges] make them WordPressBridge derivatives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specific content filtering used in these bridges will need to be reintegrated later as part of the bridge or as part of the WordPressBridge if they are considered generic enough filters, such as the already existing WordPressBridge #is', '', $text); - $text = strip_tags($text, '



  • '); - return $text; - } + const DESCRIPTION = "Returns the newest posts from LeJournalDuGeek (full text)."; + const PARAMETERS = array(); public function getCacheDuration(){ return 1800; // 30min diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 4360772..ed2fb3d 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -1,61 +1,11 @@ '; - $close_tag_length = strlen($close_tag); - if (strpos($tag_start, $open_tag) === 0) { - while (strpos($string, $tag_start) !== false) { - $max_recursion = 100; - $section_to_remove = null; - $section_start = strpos($string, $tag_start); - $search_offset = $section_start; - do { - $max_recursion--; - $section_end = strpos($string, $close_tag, $search_offset); - $search_offset = $section_end + $close_tag_length; - $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); - $open_tag_count = substr_count($section_to_remove, $open_tag); - $close_tag_count = substr_count($section_to_remove, $close_tag); - } while ($open_tag_count > $close_tag_count && $max_recursion > 0); - $string = str_replace($section_to_remove, '', $string); - } - } - return $string; - } - - - protected function parseItem($item){ - $item = parent::parseItem($item); - - $article_html = $this->getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] = 'Could not request '.$this->getName().': '.$item['uri']; - return $item; - } - - $article_image = $article_html->find('img.wp-post-image', 0)->src; - $article_content = $article_html->find('div.entry-content', 0)->innertext; - $article_content = $this->StripRecursiveHTMLSection($article_content , 'div', '
    StripRecursiveHTMLSection($article_content , 'script', 'StripRecursiveHTMLSection($article_content , 'aside', '

    '.$item['content'].'

    '.$article_content; - - $item['content'] = $article_content; - - return $item; - - } - - public function collectData(){ - - $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); } diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 34296b5..39d7340 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,29 +1,15 @@ collectExpandableDatas(self::URI . 'feed/', 5); - } - - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); - $item['content'] = $this->ExtractContent($item['uri']); - return $item; - } - - private function ExtractContent($url){ - $article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url); - $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block - $contents = ''; // add post picture - return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post - } - + const DESCRIPTION = 'Returns the newest posts from Numerama (full text)'; + const PARAMETERS = array(); public function getCacheDuration() { + return 1800; // 30min } } diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index f48bffa..90b9641 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -1,39 +1,13 @@ getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] .= '

    Could not request Silicon: '.$item['uri'].'

    '; - return $item; - } - - $article_content = '

    '.$article_html->find('div.entry-excerpt', 0)->plaintext.'

    ' - .$article_html->find('div.entry-content', 0)->innertext; - - //Remove useless scripts left in the page - while (strpos($article_content, '') + 9); - $article_content = str_replace($script_section, '', $article_content); - } - - $item['content'] = $article_content; - - return $item; - } - - public function collectData(){ - $feedUrl = self::URI.'feed'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); public function getCacheDuration() { return 1800; // 30 minutes diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index b805d60..d1e0819 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -1,41 +1,13 @@ getSimpleHTMLDOM(self::URI) or $this->returnServerError('Could not request ' . self::URI); - - $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); - foreach ($recent_posts as $article) { - if (count($this->items) < 5) { - $uri = $article->find('a', 0)->href; - $this->items[] = $this->getDetails($uri); - } - } - } - - private function getDetails($uri) { - $html = $this->getSimpleHTMLDOM($uri) or exit; - - $item = array(); - - $article = $html->find('.gdl-blog-full', 0); - $item['uri'] = $uri; - $item['title'] = $article->find('.blog-title', 0)->find('a', 0)->innertext; - $item['content'] = $article->find('.blog-content', 0)->innertext; - $item['timestamp'] = $this->getTimestampFromDate($article->find('.blog-date', 0)->find('a', 0)->href); - return $item; - } - - private function getTimestampFromDate($uri) { - preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches); - $date = new \DateTime($matches[0]); - return $date->format('U'); - } + const PARAMETERS = array(); public function getCacheDuration() { return 7200; // 2h