NextgovBridge.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. <?php
  2. class NextgovBridge extends BridgeAbstract {
  3. public function loadMetadatas() {
  4. $this->maintainer = 'ORelio';
  5. $this->name = $this->getName();
  6. $this->uri = $this->getURI();
  7. $this->description = 'USA Federal technology news, best practices, and web 2.0 tools.';
  8. $this->update = '2016-07-20';
  9. $this->parameters[] =
  10. '[
  11. {
  12. "name" : "Category",
  13. "type" : "list",
  14. "identifier" : "category",
  15. "values" :
  16. [
  17. { "name" : "All", "value" : "all" },
  18. { "name" : "Technology News", "value" : "technology-news" },
  19. { "name" : "CIO Briefing", "value" : "cio-briefing" },
  20. { "name" : "Emerging Tech", "value" : "emerging-tech" },
  21. { "name" : "Cloud", "value" : "cloud-computing" },
  22. { "name" : "Cybersecurity", "value" : "cybersecurity" },
  23. { "name" : "Mobile", "value" : "mobile" },
  24. { "name" : "Health", "value" : "health" },
  25. { "name" : "Defense", "value" : "defense" },
  26. { "name" : "Big Data", "value" : "big-data" }
  27. ]
  28. }
  29. ]';
  30. }
  31. public function collectData(array $param) {
  32. function ExtractFromDelimiters($string, $start, $end) {
  33. if (strpos($string, $start) !== false) {
  34. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  35. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  36. return $section_retrieved;
  37. } return false;
  38. }
  39. function StripWithDelimiters($string, $start, $end) {
  40. while (strpos($string, $start) !== false) {
  41. $section_to_remove = substr($string, strpos($string, $start));
  42. $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
  43. $string = str_replace($section_to_remove, '', $string);
  44. } return $string;
  45. }
  46. $category = $param['category'];
  47. if (empty($category))
  48. $category = 'all';
  49. if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
  50. $this->returnError('Invalid "category" parameter.', 400);
  51. $url = $this->getURI().'rss/'.$category.'/';
  52. $html = $this->file_get_html($url) or $this->returnError('Could not request Nextgov: '.$url, 500);
  53. $limit = 0;
  54. foreach ($html->find('item') as $element) {
  55. if ($limit < 10) {
  56. $article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
  57. $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', '</dc:creator>');
  58. $article_title = $element->find('title', 0)->plaintext;
  59. $article_subtitle = $element->find('description', 0)->plaintext;
  60. $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
  61. $article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
  62. $article = $this->file_get_html($article_url) or $this->returnError('Could not request Nextgov: '.$article_url, 500);
  63. $contents = $article->find('div.wysiwyg', 0)->innertext;
  64. $contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
  65. $contents = StripWithDelimiters($contents, '<div', '</div>'); //ad outer div
  66. $contents = StripWithDelimiters($contents, '<script', '</script>');
  67. $contents = ($article_thumbnail == '' ? '' : '<p><img src="'.$article_thumbnail.'" /></p>')
  68. .'<p><b>'.$article_subtitle.'</b></p>'
  69. .trim($contents);
  70. if ($article_thumbnail == '')
  71. $article_thumbnail = 'http://cdn.nextgov.com/nextgov/images/logo.png';
  72. $item = new \Item();
  73. $item->uri = $article_url;
  74. $item->title = $article_title;
  75. $item->author = $article_author;
  76. $item->thumbnailUri = $article_thumbnail;
  77. $item->timestamp = $article_timestamp;
  78. $item->content = $contents;
  79. $this->items[] = $item;
  80. $limit++;
  81. }
  82. }
  83. }
  84. public function getName() {
  85. return 'Nextgov Bridge';
  86. }
  87. public function getURI() {
  88. return 'https://www.nextgov.com/';
  89. }
  90. public function getCacheDuration() {
  91. return 3600; //1 hour
  92. }
  93. }