From dfc4e8aaac78278dadc1198d142628a995491f96 Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 26 Jul 2016 21:20:13 +0200 Subject: [PATCH] [Nextgov] New bridge. USA tech news. --- bridges/NextgovBridge.php | 110 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 bridges/NextgovBridge.php diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php new file mode 100644 index 0000000..eac7aa1 --- /dev/null +++ b/bridges/NextgovBridge.php @@ -0,0 +1,110 @@ +maintainer = 'ORelio'; + $this->name = $this->getName(); + $this->uri = $this->getURI(); + $this->description = 'USA Federal technology news, best practices, and web 2.0 tools.'; + $this->update = '2016-07-20'; + + $this->parameters[] = + '[ + { + "name" : "Category", + "type" : "list", + "identifier" : "category", + "values" : + [ + { "name" : "All", "value" : "all" }, + { "name" : "Technology News", "value" : "technology-news" }, + { "name" : "CIO Briefing", "value" : "cio-briefing" }, + { "name" : "Emerging Tech", "value" : "emerging-tech" }, + { "name" : "Cloud", "value" : "cloud-computing" }, + { "name" : "Cybersecurity", "value" : "cybersecurity" }, + { "name" : "Mobile", "value" : "mobile" }, + { "name" : "Health", "value" : "health" }, + { "name" : "Defense", "value" : "defense" }, + { "name" : "Big Data", "value" : "big-data" } + ] + } + ]'; + + } + + public function collectData(array $param) { + + function ExtractFromDelimiters($string, $start, $end) { + if (strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } return false; + } + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + $category = $param['category']; + if (empty($category)) + $category = 'all'; + if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32)) + $this->returnError('Invalid "category" parameter.', 400); + $url = $this->getURI().'rss/'.$category.'/'; + $html = $this->file_get_html($url) or $this->returnError('Could not request Nextgov: '.$url, 500); + $limit = 0; + + foreach ($html->find('item') as $element) { + if ($limit < 10) { + + $article_url = ExtractFromDelimiters($element->innertext, '', ''); + $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', ''); + $article_title = $element->find('title', 0)->plaintext; + $article_subtitle = $element->find('description', 0)->plaintext; + $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $article_thumbnail = ExtractFromDelimiters($element->innertext, 'file_get_html($article_url) or $this->returnError('Could not request Nextgov: '.$article_url, 500); + + $contents = $article->find('div.wysiwyg', 0)->innertext; + $contents = StripWithDelimiters($contents, '
', '
'); + $contents = StripWithDelimiters($contents, ''); //ad outer div + $contents = StripWithDelimiters($contents, ''); + $contents = ($article_thumbnail == '' ? '' : '

') + .'

'.$article_subtitle.'

' + .trim($contents); + + if ($article_thumbnail == '') + $article_thumbnail = 'http://cdn.nextgov.com/nextgov/images/logo.png'; + + $item = new \Item(); + $item->uri = $article_url; + $item->title = $article_title; + $item->author = $article_author; + $item->thumbnailUri = $article_thumbnail; + $item->timestamp = $article_timestamp; + $item->content = $contents; + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() { + return 'Nextgov Bridge'; + } + + public function getURI() { + return 'https://www.nextgov.com/'; + } + + public function getCacheDuration() { + return 3600; //1 hour + } +} \ No newline at end of file