CNETBridge.php 3.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. <?php
  2. class CNETBridge extends BridgeAbstract {
  3. private $topicName = '';
  4. public function loadMetadatas() {
  5. $this->maintainer = 'ORelio';
  6. $this->name = 'CNET News';
  7. $this->uri = 'http://www.cnet.com/';
  8. $this->description = 'Returns the newest articles. <br /> You may specify a topic found in some section URLs, else all topics are selected.';
  9. $this->parameters[] = array(
  10. 'topic'=>array('name'=>'Topic name')
  11. );
  12. }
  13. public function collectData(array $param) {
  14. function ExtractFromDelimiters($string, $start, $end) {
  15. if (strpos($string, $start) !== false) {
  16. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  17. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  18. return $section_retrieved;
  19. } return false;
  20. }
  21. function StripWithDelimiters($string, $start, $end) {
  22. while (strpos($string, $start) !== false) {
  23. $section_to_remove = substr($string, strpos($string, $start));
  24. $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
  25. $string = str_replace($section_to_remove, '', $string);
  26. } return $string;
  27. }
  28. function CleanArticle($article_html) {
  29. $article_html = '<p>'.substr($article_html, strpos($article_html, '<p>') + 3);
  30. $article_html = StripWithDelimiters($article_html, '<span class="credit">', '</span>');
  31. $article_html = StripWithDelimiters($article_html, '<script', '</script>');
  32. $article_html = StripWithDelimiters($article_html, '<div class="shortcode related-links', '</div>');
  33. $article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge">', '</a>');
  34. return $article_html;
  35. }
  36. if (!empty($param['topic']))
  37. $this->topicName = $param['topic'];
  38. $pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/');
  39. $html = $this->getSimpleHTMLDOM($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl);
  40. $limit = 0;
  41. foreach($html->find('div.assetBody') as $element) {
  42. if ($limit < 8) {
  43. $article_title = trim($element->find('h2', 0)->plaintext);
  44. $article_uri = 'http://www.cnet.com'.($element->find('a', 0)->href);
  45. $article_timestamp = strtotime($element->find('time.assetTime', 0)->plaintext);
  46. $article_author = trim($element->find('a[rel=author]', 0)->plaintext);
  47. if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {
  48. $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri);
  49. $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '<div class="articleContent', '<footer>')));
  50. $item = array();
  51. $item['uri'] = $article_uri;
  52. $item['title'] = $article_title;
  53. $item['author'] = $article_author;
  54. $item['timestamp'] = $article_timestamp;
  55. $item['content'] = $article_content;
  56. $this->items[] = $item;
  57. $limit++;
  58. }
  59. }
  60. }
  61. }
  62. public function getName() {
  63. return 'CNET News Bridge'.(empty($this->topicName) ? '' : ' - '.$this->topicName);
  64. }
  65. public function getCacheDuration() {
  66. return 1800; // 30 minutes
  67. }
  68. }