ZDNetBridge.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. <?php
  2. class ZDNetBridge extends BridgeAbstract {
  3. public function loadMetadatas() {
  4. $this->maintainer = 'ORelio';
  5. $this->name = 'ZDNet Bridge';
  6. $this->uri = 'http://www.zdnet.com/';
  7. $this->description = 'Technology News, Analysis, Comments and Product Reviews for IT Professionals.';
  8. // http://www.zdnet.com/zdnet.opml
  9. $this->parameters[] = array(
  10. 'feed'=>array(
  11. 'name'=>'Feed',
  12. 'type'=>'list',
  13. 'values'=>array(
  14. 'Subscribe to ZDNet RSS Feeds'=>array(
  15. 'All Blogs'=>'blog',
  16. 'Just News'=>'news',
  17. 'All Reviews'=>'topic/reviews',
  18. 'Latest Downloads'=>'downloads!recent',
  19. 'Latest Articles'=>'/',
  20. 'Latest Australia Articles'=>'au',
  21. 'Latest UK Articles'=>'uk',
  22. 'Latest US Articles'=>'us',
  23. 'Latest Asia Articles'=>'as'
  24. ),
  25. 'Keep up with ZDNet Blogs RSS:'=>array(
  26. 'Transforming the Datacenter'=>'blog/transforming-datacenter',
  27. 'SMB India'=>'blog/smb-india',
  28. 'Indonesia BizTech'=>'blog/indonesia-biztech',
  29. 'Hong Kong Techie'=>'blog/hong-kong-techie',
  30. 'Tech Taiwan'=>'blog/tech-taiwan',
  31. 'Startup India'=>'blog/startup-india',
  32. 'Starting Up Asia'=>'blog/starting-up-asia',
  33. 'Next-Gen Partner'=>'blog/partner',
  34. 'Post-PC Developments'=>'blog/post-pc',
  35. 'Benelux'=>'blog/benelux',
  36. 'Heat Sink'=>'blog/heat-sink',
  37. 'Italy\'s got tech'=>'blog/italy',
  38. 'African Enterprise'=>'blog/african-enterprise',
  39. 'New Tech for Old India'=>'blog/new-india',
  40. 'Estonia Uncovered'=>'blog/estonia',
  41. 'IT Iberia'=>'blog/iberia',
  42. 'Brazil Tech'=>'blog/brazil',
  43. '500 words into the future'=>'blog/500-words-into-the-future',
  44. 'ÜberTech'=>'blog/ubertech',
  45. 'All About Microsoft'=>'blog/microsoft',
  46. 'Back office'=>'blog/back-office',
  47. 'Barker Bites Back'=>'blog/barker-bites-back',
  48. 'Between the Lines'=>'blog/btl',
  49. 'Big on Data'=>'blog/big-data',
  50. 'bootstrappr'=>'blog/bootstrappr',
  51. 'By The Way'=>'blog/by-the-way',
  52. 'Central European Processing'=>'blog/central-europe',
  53. 'Cloud Builders'=>'blog/cloud-builders',
  54. 'Communication Breakdown'=>'blog/communication-breakdown',
  55. 'Collaboration 2.0'=>'blog/collaboration',
  56. 'Constellation Research'=>'blog/constellation',
  57. 'Consumerization: BYOD'=>'blog/consumerization',
  58. 'DIY-IT'=>'blog/diy-it',
  59. 'Enterprise Web 2.0'=>'blog/hinchcliffe',
  60. 'Five Nines: The Next Gen Datacenter'=>'blog/datacenter',
  61. 'Forrester Research'=>'blog/forrester',
  62. 'Full Duplex'=>'blog/full-duplex',
  63. 'Gen Why?'=>'blog/gen-why',
  64. 'Hardware 2.0'=>'blog/hardware',
  65. 'Identity Matters'=>'blog/identity',
  66. 'iGeneration'=>'blog/igeneration',
  67. 'Internet of Everything'=>'blog/cisco',
  68. 'Beyond IT Failure'=>'blog/projectfailures',
  69. 'Jamie\'s Mostly Linux Stuff'=>'blog/jamies-mostly-linux-stuff',
  70. 'Jack\'s Blog'=>'blog/jacks-blog',
  71. 'Laptops & Desktops'=>'blog/computers',
  72. 'Linux and Open Source'=>'blog/open-source',
  73. 'London Calling'=>'blog/london',
  74. 'Mapping Babel'=>'blog/mapping-babel',
  75. 'Mixed Signals'=>'blog/mixed-signals',
  76. 'Mobile India'=>'blog/mobile-india',
  77. 'Mobile News'=>'blog/mobile-news',
  78. 'Networking'=>'blog/networking',
  79. 'Norse Code'=>'blog/norse-code',
  80. 'Null Pointer'=>'blog/null-pointer',
  81. 'The Full Tilt'=>'blog/the-full-tilt',
  82. 'Pinoy Post'=>'blog/pinoy-post',
  83. 'Practically Tech'=>'blog/practically-tech',
  84. 'Product Central'=>'blog/product-central',
  85. 'Pulp Tech'=>'blog/violetblue',
  86. 'Qubits and Pieces'=>'blog/qubits-and-pieces',
  87. 'Securify This!'=>'blog/securify-this',
  88. 'Service Oriented'=>'blog/service-oriented',
  89. 'Small Talk'=>'blog/small-talk',
  90. 'Small Business Matters'=>'blog/small-business-matters',
  91. 'Smartphones and Cell Phones'=>'blog/cell-phones',
  92. 'Social Business'=>'blog/feeds',
  93. 'Social CRM: The Conversation'=>'blog/crm',
  94. 'Software & Services Safari'=>'blog/sommer',
  95. 'Storage Bits'=>'blog/storage',
  96. 'Stacking up Open Clouds'=>'blog/apac-redhat',
  97. 'Techie Isles'=>'blog/techie-isles',
  98. 'Technolatte'=>'blog/technolatte',
  99. 'Tech Podium'=>'blog/tech-podium',
  100. 'Tel Aviv Tech'=>'blog/tel-aviv',
  101. 'Tech Broiler'=>'blog/perlow',
  102. 'The SANMAN'=>'blog/the-sanman',
  103. 'The open source revolution'=>'blog/the-open-source-revolution',
  104. 'The German View'=>'blog/german',
  105. 'The Ed Bott Report'=>'blog/bott',
  106. 'The Mobile Gadgeteer'=>'blog/mobile-gadgeteer',
  107. 'The Apple Core'=>'blog/apple',
  108. 'Tom Foremski: IMHO'=>'blog/foremski',
  109. 'Twisted Wire'=>'blog/twisted-wire',
  110. 'Vive la tech'=>'blog/france',
  111. 'Virtually Speaking'=>'blog/virtualization',
  112. 'View from China'=>'blog/china',
  113. 'Web design & Free Software'=>'blog/web-design-and-free-software',
  114. 'ZDNet Government'=>'blog/government',
  115. 'ZDNet UK Book Reviews'=>'blog/zdnet-uk-book-reviews',
  116. 'ZDNet UK First Take'=>'blog/zdnet-uk-first-take',
  117. 'Zero Day'=>'blog/security'
  118. ),
  119. 'ZDNet Hot Topics RSS:'=>array(
  120. 'Apple'=>'topic/apple',
  121. 'Collaboration'=>'topic/collaboration',
  122. 'Enterprise Software'=>'topic/enterprise-software',
  123. 'Google'=>'topic/google',
  124. 'Great debate'=>'topic/great-debate',
  125. 'Hardware'=>'topic/hardware',
  126. 'IBM'=>'topic/ibm',
  127. 'iOS'=>'topic/ios',
  128. 'iPhone'=>'topic/iphone',
  129. 'iPad'=>'topic/ipad',
  130. 'IT Priorities'=>'topic/it-priorities',
  131. 'Laptops'=>'topic/laptops',
  132. 'Legal'=>'topic/legal',
  133. 'Linux'=>'topic/linux',
  134. 'Microsoft'=>'topic/microsoft',
  135. 'Mobile OS'=>'topic/mobile-os',
  136. 'Mobility'=>'topic/mobility',
  137. 'Networking'=>'topic/networking',
  138. 'Oracle'=>'topic/oracle',
  139. 'Processors'=>'topic/processors',
  140. 'Samsung'=>'topic/samsung',
  141. 'Security'=>'topic/security',
  142. 'Small business: going big on mobility'=>'topic/small-business-going-big-on-mobility'
  143. ),
  144. 'Product Blogs:'=>array(
  145. 'Digital Cameras & Camcorders'=>'blog/digitalcameras',
  146. 'Home Theater'=>'blog/home-theater',
  147. 'Laptops and Desktops'=>'blog/computers',
  148. 'The Mobile Gadgeteer'=>'blog/mobile-gadgeteer',
  149. 'Smartphones and Cell Phones'=>'blog/cell-phones',
  150. 'The ToyBox'=>'blog/gadgetreviews'
  151. ),
  152. 'Vertical Blogs:'=>array(
  153. 'ZDNet Education'=>'blog/education',
  154. 'ZDNet Healthcare'=>'blog/healthcare',
  155. 'ZDNet Government'=>'blog/government'
  156. )
  157. )
  158. )
  159. );
  160. }
  161. public function collectData(array $param) {
  162. function StripCDATA($string) {
  163. $string = str_replace('<![CDATA[', '', $string);
  164. $string = str_replace(']]>', '', $string);
  165. return trim($string);
  166. }
  167. function ExtractFromDelimiters($string, $start, $end) {
  168. if (strpos($string, $start) !== false) {
  169. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  170. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  171. return $section_retrieved;
  172. } return false;
  173. }
  174. function StripWithDelimiters($string, $start, $end) {
  175. while (strpos($string, $start) !== false) {
  176. $section_to_remove = substr($string, strpos($string, $start));
  177. $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
  178. $string = str_replace($section_to_remove, '', $string);
  179. } return $string;
  180. }
  181. function StripRecursiveHTMLSection($string, $tag_name, $tag_start) {
  182. $open_tag = '<'.$tag_name;
  183. $close_tag = '</'.$tag_name.'>';
  184. $close_tag_length = strlen($close_tag);
  185. if (strpos($tag_start, $open_tag) === 0) {
  186. while (strpos($string, $tag_start) !== false) {
  187. $max_recursion = 100;
  188. $section_to_remove = null;
  189. $section_start = strpos($string, $tag_start);
  190. $search_offset = $section_start;
  191. do {
  192. $max_recursion--;
  193. $section_end = strpos($string, $close_tag, $search_offset);
  194. $search_offset = $section_end + $close_tag_length;
  195. $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length);
  196. $open_tag_count = substr_count($section_to_remove, $open_tag);
  197. $close_tag_count = substr_count($section_to_remove, $close_tag);
  198. } while ($open_tag_count > $close_tag_count && $max_recursion > 0);
  199. $string = str_replace($section_to_remove, '', $string);
  200. }
  201. }
  202. return $string;
  203. }
  204. $baseUri = $this->getURI();
  205. $feed = $param['feed'];
  206. if (empty($feed))
  207. $this->returnClientError('Please select a feed to display.');
  208. if (strpos($feed, 'downloads!') !== false) {
  209. $feed = str_replace('downloads!', '', $feed);
  210. $baseUri = str_replace('www.', 'downloads.', $baseUri);
  211. }
  212. if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
  213. $this->returnClientError('Invalid "feed" parameter.');
  214. $url = $baseUri.trim($feed, '/').'/rss.xml';
  215. $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request ZDNet: '.$url);
  216. $limit = 0;
  217. foreach ($html->find('item') as $element) {
  218. if ($limit < 10) {
  219. $article_url = preg_replace('/([^#]+)#ftag=.*/', '$1', StripCDATA(ExtractFromDelimiters($element->innertext, '<link>', '</link>')));
  220. $article_author = StripCDATA(ExtractFromDelimiters($element->innertext, 'role="author">', '<'));
  221. $article_title = StripCDATA($element->find('title', 0)->plaintext);
  222. $article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
  223. $article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
  224. $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
  225. if (!empty($article_author))
  226. $author = $article_author;
  227. else {
  228. $author = $article->find('meta[name=author]', 0);
  229. if (is_object($author))
  230. $author = $author->content;
  231. else $author = 'ZDNet';
  232. }
  233. $thumbnail = $article->find('meta[itemprop=image]', 0);
  234. if (is_object($thumbnail))
  235. $thumbnail = $thumbnail->content;
  236. else $thumbnail = '';
  237. $contents = $article->find('article', 0)->innertext;
  238. foreach (array(
  239. '<div class="shareBar"',
  240. '<div class="shortcodeGalleryWrapper"',
  241. '<div class="relatedContent',
  242. '<div class="downloadNow',
  243. '<div data-shortcode',
  244. '<div id="sharethrough',
  245. '<div id="inpage-video'
  246. ) as $div_start) {
  247. $contents = StripRecursiveHTMLSection($contents , 'div', $div_start);
  248. }
  249. $contents = StripWithDelimiters($contents, '<script', '</script>');
  250. $contents = StripWithDelimiters($contents, '<meta itemprop="image"', '>');
  251. $contents = trim(StripWithDelimiters($contents, '<section class="sharethrough-top', '</section>'));
  252. $content_img = strpos($contents, '<img'); //Look for first image
  253. if (($content_img !== false && $content_img < 512) || $thumbnail == '')
  254. $content_img = ''; //Image already present on article beginning or no thumbnail
  255. else $content_img = '<p><img src="'.$thumbnail.'" /></p>'; //Include thumbnail
  256. $contents = $content_img
  257. .'<p><b>'.$article_subtitle.'</b></p>'
  258. .$contents;
  259. $item = array();
  260. $item['author'] = $author;
  261. $item['uri'] = $article_url;
  262. $item['title'] = $article_title;
  263. $item['timestamp'] = $article_timestamp;
  264. $item['content'] = $contents;
  265. $this->items[] = $item;
  266. $limit++;
  267. }
  268. }
  269. }
  270. }