NextgovBridge.php 3.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. <?php
  2. class NextgovBridge extends BridgeAbstract {
  3. public function loadMetadatas() {
  4. $this->maintainer = 'ORelio';
  5. $this->name = 'Nextgov Bridge';
  6. $this->uri = 'https://www.nextgov.com/';
  7. $this->description = 'USA Federal technology news, best practices, and web 2.0 tools.';
  8. $this->parameters[] = array(
  9. 'category'=>array(
  10. 'name'=>'Category',
  11. 'type'=>'list',
  12. 'values'=>array(
  13. 'All'=>'all',
  14. 'Technology News'=>'technology-news',
  15. 'CIO Briefing'=>'cio-briefing',
  16. 'Emerging Tech'=>'emerging-tech',
  17. 'Cloud'=>'cloud-computing',
  18. 'Cybersecurity'=>'cybersecurity',
  19. 'Mobile'=>'mobile',
  20. 'Health'=>'health',
  21. 'Defense'=>'defense',
  22. 'Big Data'=>'big-data'
  23. )
  24. )
  25. );
  26. }
  27. public function collectData(array $param) {
  28. function ExtractFromDelimiters($string, $start, $end) {
  29. if (strpos($string, $start) !== false) {
  30. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  31. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  32. return $section_retrieved;
  33. } return false;
  34. }
  35. function StripWithDelimiters($string, $start, $end) {
  36. while (strpos($string, $start) !== false) {
  37. $section_to_remove = substr($string, strpos($string, $start));
  38. $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
  39. $string = str_replace($section_to_remove, '', $string);
  40. } return $string;
  41. }
  42. $category = $param['category'];
  43. if (empty($category))
  44. $category = 'all';
  45. if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
  46. $this->returnClientError('Invalid "category" parameter.');
  47. $url = $this->getURI().'rss/'.$category.'/';
  48. $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Nextgov: '.$url);
  49. $limit = 0;
  50. foreach ($html->find('item') as $element) {
  51. if ($limit < 10) {
  52. $article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
  53. $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', '</dc:creator>');
  54. $article_title = $element->find('title', 0)->plaintext;
  55. $article_subtitle = $element->find('description', 0)->plaintext;
  56. $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
  57. $article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
  58. $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
  59. $contents = $article->find('div.wysiwyg', 0)->innertext;
  60. $contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
  61. $contents = StripWithDelimiters($contents, '<div', '</div>'); //ad outer div
  62. $contents = StripWithDelimiters($contents, '<script', '</script>');
  63. $contents = ($article_thumbnail == '' ? '' : '<p><img src="'.$article_thumbnail.'" /></p>')
  64. .'<p><b>'.$article_subtitle.'</b></p>'
  65. .trim($contents);
  66. $item = array();
  67. $item['uri'] = $article_url;
  68. $item['title'] = $article_title;
  69. $item['author'] = $article_author;
  70. $item['timestamp'] = $article_timestamp;
  71. $item['content'] = $contents;
  72. $this->items[] = $item;
  73. $limit++;
  74. }
  75. }
  76. }
  77. }