ZDNetBridge.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. <?php
  2. class ZDNetBridge extends BridgeAbstract {
  3. const MAINTAINER = 'ORelio';
  4. const NAME = 'ZDNet Bridge';
  5. const URI = 'http://www.zdnet.com/';
  6. const DESCRIPTION = 'Technology News, Analysis, Comments and Product Reviews for IT Professionals.';
  7. //http://www.zdnet.com/zdnet.opml
  8. const PARAMETERS = array( array(
  9. 'feed' => array(
  10. 'name' => 'Feed',
  11. 'type' => 'list',
  12. 'values' => array(
  13. 'Subscribe to ZDNet RSS Feeds' => array(
  14. 'All Blogs' => 'blog',
  15. 'Just News' => 'news',
  16. 'All Reviews' => 'topic/reviews',
  17. 'Latest Downloads' => 'downloads!recent',
  18. 'Latest Articles' => '/',
  19. 'Latest Australia Articles' => 'au',
  20. 'Latest UK Articles' => 'uk',
  21. 'Latest US Articles' => 'us',
  22. 'Latest Asia Articles' => 'as'
  23. ),
  24. 'Keep up with ZDNet Blogs RSS:' => array(
  25. 'Transforming the Datacenter' => 'blog/transforming-datacenter',
  26. 'SMB India' => 'blog/smb-india',
  27. 'Indonesia BizTech' => 'blog/indonesia-biztech',
  28. 'Hong Kong Techie' => 'blog/hong-kong-techie',
  29. 'Tech Taiwan' => 'blog/tech-taiwan',
  30. 'Startup India' => 'blog/startup-india',
  31. 'Starting Up Asia' => 'blog/starting-up-asia',
  32. 'Next-Gen Partner' => 'blog/partner',
  33. 'Post-PC Developments' => 'blog/post-pc',
  34. 'Benelux' => 'blog/benelux',
  35. 'Heat Sink' => 'blog/heat-sink',
  36. 'Italy\'s got tech' => 'blog/italy',
  37. 'African Enterprise' => 'blog/african-enterprise',
  38. 'New Tech for Old India' => 'blog/new-india',
  39. 'Estonia Uncovered' => 'blog/estonia',
  40. 'IT Iberia' => 'blog/iberia',
  41. 'Brazil Tech' => 'blog/brazil',
  42. '500 words into the future' => 'blog/500-words-into-the-future',
  43. 'ÜberTech' => 'blog/ubertech',
  44. 'All About Microsoft' => 'blog/microsoft',
  45. 'Back office' => 'blog/back-office',
  46. 'Barker Bites Back' => 'blog/barker-bites-back',
  47. 'Between the Lines' => 'blog/btl',
  48. 'Big on Data' => 'blog/big-data',
  49. 'bootstrappr' => 'blog/bootstrappr',
  50. 'By The Way' => 'blog/by-the-way',
  51. 'Central European Processing' => 'blog/central-europe',
  52. 'Cloud Builders' => 'blog/cloud-builders',
  53. 'Communication Breakdown' => 'blog/communication-breakdown',
  54. 'Collaboration 2.0' => 'blog/collaboration',
  55. 'Constellation Research' => 'blog/constellation',
  56. 'Consumerization: BYOD' => 'blog/consumerization',
  57. 'DIY-IT' => 'blog/diy-it',
  58. 'Enterprise Web 2.0' => 'blog/hinchcliffe',
  59. 'Five Nines: The Next Gen Datacenter' => 'blog/datacenter',
  60. 'Forrester Research' => 'blog/forrester',
  61. 'Full Duplex' => 'blog/full-duplex',
  62. 'Gen Why?' => 'blog/gen-why',
  63. 'Hardware 2.0' => 'blog/hardware',
  64. 'Identity Matters' => 'blog/identity',
  65. 'iGeneration' => 'blog/igeneration',
  66. 'Internet of Everything' => 'blog/cisco',
  67. 'Beyond IT Failure' => 'blog/projectfailures',
  68. 'Jamie\'s Mostly Linux Stuff' => 'blog/jamies-mostly-linux-stuff',
  69. 'Jack\'s Blog' => 'blog/jacks-blog',
  70. 'Laptops & Desktops' => 'blog/computers',
  71. 'Linux and Open Source' => 'blog/open-source',
  72. 'London Calling' => 'blog/london',
  73. 'Mapping Babel' => 'blog/mapping-babel',
  74. 'Mixed Signals' => 'blog/mixed-signals',
  75. 'Mobile India' => 'blog/mobile-india',
  76. 'Mobile News' => 'blog/mobile-news',
  77. 'Networking' => 'blog/networking',
  78. 'Norse Code' => 'blog/norse-code',
  79. 'Null Pointer' => 'blog/null-pointer',
  80. 'The Full Tilt' => 'blog/the-full-tilt',
  81. 'Pinoy Post' => 'blog/pinoy-post',
  82. 'Practically Tech' => 'blog/practically-tech',
  83. 'Product Central' => 'blog/product-central',
  84. 'Pulp Tech' => 'blog/violetblue',
  85. 'Qubits and Pieces' => 'blog/qubits-and-pieces',
  86. 'Securify This!' => 'blog/securify-this',
  87. 'Service Oriented' => 'blog/service-oriented',
  88. 'Small Talk' => 'blog/small-talk',
  89. 'Small Business Matters' => 'blog/small-business-matters',
  90. 'Smartphones and Cell Phones' => 'blog/cell-phones',
  91. 'Social Business' => 'blog/feeds',
  92. 'Social CRM: The Conversation' => 'blog/crm',
  93. 'Software & Services Safari' => 'blog/sommer',
  94. 'Storage Bits' => 'blog/storage',
  95. 'Stacking up Open Clouds' => 'blog/apac-redhat',
  96. 'Techie Isles' => 'blog/techie-isles',
  97. 'Technolatte' => 'blog/technolatte',
  98. 'Tech Podium' => 'blog/tech-podium',
  99. 'Tel Aviv Tech' => 'blog/tel-aviv',
  100. 'Tech Broiler' => 'blog/perlow',
  101. 'The SANMAN' => 'blog/the-sanman',
  102. 'The open source revolution' => 'blog/the-open-source-revolution',
  103. 'The German View' => 'blog/german',
  104. 'The Ed Bott Report' => 'blog/bott',
  105. 'The Mobile Gadgeteer' => 'blog/mobile-gadgeteer',
  106. 'The Apple Core' => 'blog/apple',
  107. 'Tom Foremski: IMHO' => 'blog/foremski',
  108. 'Twisted Wire' => 'blog/twisted-wire',
  109. 'Vive la tech' => 'blog/france',
  110. 'Virtually Speaking' => 'blog/virtualization',
  111. 'View from China' => 'blog/china',
  112. 'Web design & Free Software' => 'blog/web-design-and-free-software',
  113. 'ZDNet Government' => 'blog/government',
  114. 'ZDNet UK Book Reviews' => 'blog/zdnet-uk-book-reviews',
  115. 'ZDNet UK First Take' => 'blog/zdnet-uk-first-take',
  116. 'Zero Day' => 'blog/security'
  117. ),
  118. 'ZDNet Hot Topics RSS:' => array(
  119. 'Apple' => 'topic/apple',
  120. 'Collaboration' => 'topic/collaboration',
  121. 'Enterprise Software' => 'topic/enterprise-software',
  122. 'Google' => 'topic/google',
  123. 'Great debate' => 'topic/great-debate',
  124. 'Hardware' => 'topic/hardware',
  125. 'IBM' => 'topic/ibm',
  126. 'iOS' => 'topic/ios',
  127. 'iPhone' => 'topic/iphone',
  128. 'iPad' => 'topic/ipad',
  129. 'IT Priorities' => 'topic/it-priorities',
  130. 'Laptops' => 'topic/laptops',
  131. 'Legal' => 'topic/legal',
  132. 'Linux' => 'topic/linux',
  133. 'Microsoft' => 'topic/microsoft',
  134. 'Mobile OS' => 'topic/mobile-os',
  135. 'Mobility' => 'topic/mobility',
  136. 'Networking' => 'topic/networking',
  137. 'Oracle' => 'topic/oracle',
  138. 'Processors' => 'topic/processors',
  139. 'Samsung' => 'topic/samsung',
  140. 'Security' => 'topic/security',
  141. 'Small business: going big on mobility' => 'topic/small-business-going-big-on-mobility'
  142. ),
  143. 'Product Blogs:' => array(
  144. 'Digital Cameras & Camcorders' => 'blog/digitalcameras',
  145. 'Home Theater' => 'blog/home-theater',
  146. 'Laptops and Desktops' => 'blog/computers',
  147. 'The Mobile Gadgeteer' => 'blog/mobile-gadgeteer',
  148. 'Smartphones and Cell Phones' => 'blog/cell-phones',
  149. 'The ToyBox' => 'blog/gadgetreviews'
  150. ),
  151. 'Vertical Blogs:' => array(
  152. 'ZDNet Education' => 'blog/education',
  153. 'ZDNet Healthcare' => 'blog/healthcare',
  154. 'ZDNet Government' => 'blog/government'
  155. )
  156. )
  157. )
  158. ));
  159. public function collectData(){
  160. function stripCdata($string){
  161. $string = str_replace('<![CDATA[', '', $string);
  162. $string = str_replace(']]>', '', $string);
  163. return trim($string);
  164. }
  165. function extractFromDelimiters($string, $start, $end){
  166. if(strpos($string, $start) !== false) {
  167. $section_retrieved = substr($string, strpos($string, $start) + strlen($start));
  168. $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
  169. return $section_retrieved;
  170. }
  171. return false;
  172. }
  173. function stripWithDelimiters($string, $start, $end){
  174. while(strpos($string, $start) !== false) {
  175. $section_to_remove = substr($string, strpos($string, $start));
  176. $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
  177. $string = str_replace($section_to_remove, '', $string);
  178. }
  179. return $string;
  180. }
  181. function stripRecursiveHtmlSection($string, $tag_name, $tag_start){
  182. $open_tag = '<' . $tag_name;
  183. $close_tag = '</' . $tag_name . '>';
  184. $close_tag_length = strlen($close_tag);
  185. if(strpos($tag_start, $open_tag) === 0) {
  186. while(strpos($string, $tag_start) !== false) {
  187. $max_recursion = 100;
  188. $section_to_remove = null;
  189. $section_start = strpos($string, $tag_start);
  190. $search_offset = $section_start;
  191. do {
  192. $max_recursion--;
  193. $section_end = strpos($string, $close_tag, $search_offset);
  194. $search_offset = $section_end + $close_tag_length;
  195. $section_to_remove = substr(
  196. $string,
  197. $section_start,
  198. $section_end - $section_start + $close_tag_length
  199. );
  200. $open_tag_count = substr_count($section_to_remove, $open_tag);
  201. $close_tag_count = substr_count($section_to_remove, $close_tag);
  202. } while ($open_tag_count > $close_tag_count && $max_recursion > 0);
  203. $string = str_replace($section_to_remove, '', $string);
  204. }
  205. }
  206. return $string;
  207. }
  208. $baseUri = self::URI;
  209. $feed = $this->getInput('feed');
  210. if(strpos($feed, 'downloads!') !== false) {
  211. $feed = str_replace('downloads!', '', $feed);
  212. $baseUri = str_replace('www.', 'downloads.', $baseUri);
  213. }
  214. $url = $baseUri . trim($feed, '/') . '/rss.xml';
  215. $html = getSimpleHTMLDOM($url)
  216. or returnServerError('Could not request ZDNet: ' . $url);
  217. $limit = 0;
  218. foreach($html->find('item') as $element) {
  219. if($limit < 10) {
  220. $article_url = preg_replace(
  221. '/([^#]+)#ftag=.*/',
  222. '$1',
  223. stripCdata(extractFromDelimiters($element->innertext, '<link>', '</link>'))
  224. );
  225. $article_author = stripCdata(extractFromDelimiters($element->innertext, 'role="author">', '<'));
  226. $article_title = stripCdata($element->find('title', 0)->plaintext);
  227. $article_subtitle = stripCdata($element->find('description', 0)->plaintext);
  228. $article_timestamp = strtotime(stripCdata($element->find('pubDate', 0)->plaintext));
  229. $article = getSimpleHTMLDOM($article_url)
  230. or returnServerError('Could not request ZDNet: ' . $article_url);
  231. if(!empty($article_author)) {
  232. $author = $article_author;
  233. } else {
  234. $author = $article->find('meta[name=author]', 0);
  235. if(is_object($author)) {
  236. $author = $author->content;
  237. } else {
  238. $author = 'ZDNet';
  239. }
  240. }
  241. $thumbnail = $article->find('meta[itemprop=image]', 0);
  242. if(is_object($thumbnail)) {
  243. $thumbnail = $thumbnail->content;
  244. } else {
  245. $thumbnail = '';
  246. }
  247. $contents = $article->find('article', 0)->innertext;
  248. foreach(array(
  249. '<div class="shareBar"',
  250. '<div class="shortcodeGalleryWrapper"',
  251. '<div class="relatedContent',
  252. '<div class="downloadNow',
  253. '<div data-shortcode',
  254. '<div id="sharethrough',
  255. '<div id="inpage-video'
  256. ) as $div_start) {
  257. $contents = stripRecursiveHtmlSection($contents, 'div', $div_start);
  258. }
  259. $contents = stripWithDelimiters($contents, '<script', '</script>');
  260. $contents = stripWithDelimiters($contents, '<meta itemprop="image"', '>');
  261. $contents = trim(stripWithDelimiters($contents, '<section class="sharethrough-top', '</section>'));
  262. $content_img = strpos($contents, '<img'); //Look for first image
  263. if (($content_img !== false && $content_img < 512) || $thumbnail == '') {
  264. $content_img = ''; //Image already present on article beginning or no thumbnail
  265. } else {
  266. $content_img = '<p><img src="'.$thumbnail.'" /></p>'; //Include thumbnail
  267. }
  268. $contents = $content_img
  269. . '<p><b>'
  270. . $article_subtitle
  271. . '</b></p>'
  272. . $contents;
  273. $item = array();
  274. $item['author'] = $author;
  275. $item['uri'] = $article_url;
  276. $item['title'] = $article_title;
  277. $item['timestamp'] = $article_timestamp;
  278. $item['content'] = $contents;
  279. $this->items[] = $item;
  280. $limit++;
  281. }
  282. }
  283. }
  284. }