1
0
Fork 0
forked from blallo/rss-bridge
rss-bridge/bridges/ZDNetBridge.php
2017-02-14 17:28:07 +01:00

302 lines
11 KiB
PHP

<?php
class ZDNetBridge extends BridgeAbstract {
const MAINTAINER = 'ORelio';
const NAME = 'ZDNet Bridge';
const URI = 'http://www.zdnet.com/';
const DESCRIPTION = 'Technology News, Analysis, Comments and Product Reviews for IT Professionals.';
//http://www.zdnet.com/zdnet.opml
const PARAMETERS = array( array(
'feed' => array(
'name' => 'Feed',
'type' => 'list',
'values' => array(
'Subscribe to ZDNet RSS Feeds' => array(
'All Blogs' => 'blog',
'Just News' => 'news',
'All Reviews' => 'topic/reviews',
'Latest Downloads' => 'downloads!recent',
'Latest Articles' => '/',
'Latest Australia Articles' => 'au',
'Latest UK Articles' => 'uk',
'Latest US Articles' => 'us',
'Latest Asia Articles' => 'as'
),
'Keep up with ZDNet Blogs RSS:' => array(
'Transforming the Datacenter' => 'blog/transforming-datacenter',
'SMB India' => 'blog/smb-india',
'Indonesia BizTech' => 'blog/indonesia-biztech',
'Hong Kong Techie' => 'blog/hong-kong-techie',
'Tech Taiwan' => 'blog/tech-taiwan',
'Startup India' => 'blog/startup-india',
'Starting Up Asia' => 'blog/starting-up-asia',
'Next-Gen Partner' => 'blog/partner',
'Post-PC Developments' => 'blog/post-pc',
'Benelux' => 'blog/benelux',
'Heat Sink' => 'blog/heat-sink',
'Italy\'s got tech' => 'blog/italy',
'African Enterprise' => 'blog/african-enterprise',
'New Tech for Old India' => 'blog/new-india',
'Estonia Uncovered' => 'blog/estonia',
'IT Iberia' => 'blog/iberia',
'Brazil Tech' => 'blog/brazil',
'500 words into the future' => 'blog/500-words-into-the-future',
'ÜberTech' => 'blog/ubertech',
'All About Microsoft' => 'blog/microsoft',
'Back office' => 'blog/back-office',
'Barker Bites Back' => 'blog/barker-bites-back',
'Between the Lines' => 'blog/btl',
'Big on Data' => 'blog/big-data',
'bootstrappr' => 'blog/bootstrappr',
'By The Way' => 'blog/by-the-way',
'Central European Processing' => 'blog/central-europe',
'Cloud Builders' => 'blog/cloud-builders',
'Communication Breakdown' => 'blog/communication-breakdown',
'Collaboration 2.0' => 'blog/collaboration',
'Constellation Research' => 'blog/constellation',
'Consumerization: BYOD' => 'blog/consumerization',
'DIY-IT' => 'blog/diy-it',
'Enterprise Web 2.0' => 'blog/hinchcliffe',
'Five Nines: The Next Gen Datacenter' => 'blog/datacenter',
'Forrester Research' => 'blog/forrester',
'Full Duplex' => 'blog/full-duplex',
'Gen Why?' => 'blog/gen-why',
'Hardware 2.0' => 'blog/hardware',
'Identity Matters' => 'blog/identity',
'iGeneration' => 'blog/igeneration',
'Internet of Everything' => 'blog/cisco',
'Beyond IT Failure' => 'blog/projectfailures',
'Jamie\'s Mostly Linux Stuff' => 'blog/jamies-mostly-linux-stuff',
'Jack\'s Blog' => 'blog/jacks-blog',
'Laptops & Desktops' => 'blog/computers',
'Linux and Open Source' => 'blog/open-source',
'London Calling' => 'blog/london',
'Mapping Babel' => 'blog/mapping-babel',
'Mixed Signals' => 'blog/mixed-signals',
'Mobile India' => 'blog/mobile-india',
'Mobile News' => 'blog/mobile-news',
'Networking' => 'blog/networking',
'Norse Code' => 'blog/norse-code',
'Null Pointer' => 'blog/null-pointer',
'The Full Tilt' => 'blog/the-full-tilt',
'Pinoy Post' => 'blog/pinoy-post',
'Practically Tech' => 'blog/practically-tech',
'Product Central' => 'blog/product-central',
'Pulp Tech' => 'blog/violetblue',
'Qubits and Pieces' => 'blog/qubits-and-pieces',
'Securify This!' => 'blog/securify-this',
'Service Oriented' => 'blog/service-oriented',
'Small Talk' => 'blog/small-talk',
'Small Business Matters' => 'blog/small-business-matters',
'Smartphones and Cell Phones' => 'blog/cell-phones',
'Social Business' => 'blog/feeds',
'Social CRM: The Conversation' => 'blog/crm',
'Software & Services Safari' => 'blog/sommer',
'Storage Bits' => 'blog/storage',
'Stacking up Open Clouds' => 'blog/apac-redhat',
'Techie Isles' => 'blog/techie-isles',
'Technolatte' => 'blog/technolatte',
'Tech Podium' => 'blog/tech-podium',
'Tel Aviv Tech' => 'blog/tel-aviv',
'Tech Broiler' => 'blog/perlow',
'The SANMAN' => 'blog/the-sanman',
'The open source revolution' => 'blog/the-open-source-revolution',
'The German View' => 'blog/german',
'The Ed Bott Report' => 'blog/bott',
'The Mobile Gadgeteer' => 'blog/mobile-gadgeteer',
'The Apple Core' => 'blog/apple',
'Tom Foremski: IMHO' => 'blog/foremski',
'Twisted Wire' => 'blog/twisted-wire',
'Vive la tech' => 'blog/france',
'Virtually Speaking' => 'blog/virtualization',
'View from China' => 'blog/china',
'Web design & Free Software' => 'blog/web-design-and-free-software',
'ZDNet Government' => 'blog/government',
'ZDNet UK Book Reviews' => 'blog/zdnet-uk-book-reviews',
'ZDNet UK First Take' => 'blog/zdnet-uk-first-take',
'Zero Day' => 'blog/security'
),
'ZDNet Hot Topics RSS:' => array(
'Apple' => 'topic/apple',
'Collaboration' => 'topic/collaboration',
'Enterprise Software' => 'topic/enterprise-software',
'Google' => 'topic/google',
'Great debate' => 'topic/great-debate',
'Hardware' => 'topic/hardware',
'IBM' => 'topic/ibm',
'iOS' => 'topic/ios',
'iPhone' => 'topic/iphone',
'iPad' => 'topic/ipad',
'IT Priorities' => 'topic/it-priorities',
'Laptops' => 'topic/laptops',
'Legal' => 'topic/legal',
'Linux' => 'topic/linux',
'Microsoft' => 'topic/microsoft',
'Mobile OS' => 'topic/mobile-os',
'Mobility' => 'topic/mobility',
'Networking' => 'topic/networking',
'Oracle' => 'topic/oracle',
'Processors' => 'topic/processors',
'Samsung' => 'topic/samsung',
'Security' => 'topic/security',
'Small business: going big on mobility' => 'topic/small-business-going-big-on-mobility'
),
'Product Blogs:' => array(
'Digital Cameras & Camcorders' => 'blog/digitalcameras',
'Home Theater' => 'blog/home-theater',
'Laptops and Desktops' => 'blog/computers',
'The Mobile Gadgeteer' => 'blog/mobile-gadgeteer',
'Smartphones and Cell Phones' => 'blog/cell-phones',
'The ToyBox' => 'blog/gadgetreviews'
),
'Vertical Blogs:' => array(
'ZDNet Education' => 'blog/education',
'ZDNet Healthcare' => 'blog/healthcare',
'ZDNet Government' => 'blog/government'
)
)
)
));
public function collectData(){
function stripCdata($string){
$string = str_replace('<![CDATA[', '', $string);
$string = str_replace(']]>', '', $string);
return trim($string);
}
function extractFromDelimiters($string, $start, $end){
if(strpos($string, $start) !== false){
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
return $section_retrieved;
}
return false;
}
function stripWithDelimiters($string, $start, $end){
while(strpos($string, $start) !== false){
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
}
return $string;
}
function stripRecursiveHtmlSection($string, $tag_name, $tag_start){
$open_tag = '<' . $tag_name;
$close_tag = '</' . $tag_name . '>';
$close_tag_length = strlen($close_tag);
if(strpos($tag_start, $open_tag) === 0){
while(strpos($string, $tag_start) !== false){
$max_recursion = 100;
$section_to_remove = null;
$section_start = strpos($string, $tag_start);
$search_offset = $section_start;
do {
$max_recursion--;
$section_end = strpos($string, $close_tag, $search_offset);
$search_offset = $section_end + $close_tag_length;
$section_to_remove = substr(
$string,
$section_start,
$section_end - $section_start + $close_tag_length
);
$open_tag_count = substr_count($section_to_remove, $open_tag);
$close_tag_count = substr_count($section_to_remove, $close_tag);
} while ($open_tag_count > $close_tag_count && $max_recursion > 0);
$string = str_replace($section_to_remove, '', $string);
}
}
return $string;
}
$baseUri = self::URI;
$feed = $this->getInput('feed');
if(strpos($feed, 'downloads!') !== false){
$feed = str_replace('downloads!', '', $feed);
$baseUri = str_replace('www.', 'downloads.', $baseUri);
}
$url = $baseUri . trim($feed, '/') . '/rss.xml';
$html = getSimpleHTMLDOM($url)
or returnServerError('Could not request ZDNet: ' . $url);
$limit = 0;
foreach($html->find('item') as $element){
if($limit < 10){
$article_url = preg_replace(
'/([^#]+)#ftag=.*/',
'$1',
stripCdata(extractFromDelimiters($element->innertext, '<link>', '</link>'))
);
$article_author = stripCdata(extractFromDelimiters($element->innertext, 'role="author">', '<'));
$article_title = stripCdata($element->find('title', 0)->plaintext);
$article_subtitle = stripCdata($element->find('description', 0)->plaintext);
$article_timestamp = strtotime(stripCdata($element->find('pubDate', 0)->plaintext));
$article = getSimpleHTMLDOM($article_url)
or returnServerError('Could not request ZDNet: ' . $article_url);
if(!empty($article_author)){
$author = $article_author;
} else {
$author = $article->find('meta[name=author]', 0);
if(is_object($author)){
$author = $author->content;
} else {
$author = 'ZDNet';
}
}
$thumbnail = $article->find('meta[itemprop=image]', 0);
if(is_object($thumbnail)){
$thumbnail = $thumbnail->content;
} else {
$thumbnail = '';
}
$contents = $article->find('article', 0)->innertext;
foreach(array(
'<div class="shareBar"',
'<div class="shortcodeGalleryWrapper"',
'<div class="relatedContent',
'<div class="downloadNow',
'<div data-shortcode',
'<div id="sharethrough',
'<div id="inpage-video'
) as $div_start){
$contents = stripRecursiveHtmlSection($contents, 'div', $div_start);
}
$contents = stripWithDelimiters($contents, '<script', '</script>');
$contents = stripWithDelimiters($contents, '<meta itemprop="image"', '>');
$contents = trim(stripWithDelimiters($contents, '<section class="sharethrough-top', '</section>'));
$content_img = strpos($contents, '<img'); //Look for first image
if (($content_img !== false && $content_img < 512) || $thumbnail == ''){
$content_img = ''; //Image already present on article beginning or no thumbnail
} else {
$content_img = '<p><img src="'.$thumbnail.'" /></p>'; //Include thumbnail
}
$contents = $content_img
. '<p><b>'
. $article_subtitle
. '</b></p>'
. $contents;
$item = array();
$item['author'] = $author;
$item['uri'] = $article_url;
$item['title'] = $article_title;
$item['timestamp'] = $article_timestamp;
$item['content'] = $contents;
$this->items[] = $item;
$limit++;
}
}
}
}