forked from blallo/rss-bridge
Merge pull request #289 from ORelio/master
Numerama fixes & add 2 bridges
This commit is contained in:
commit
0454fcc780
3 changed files with 427 additions and 44 deletions
|
@ -1,60 +1,60 @@
|
||||||
<?php
|
<?php
|
||||||
class NumeramaBridge extends BridgeAbstract{
|
class NumeramaBridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function loadMetadatas() {
|
public function loadMetadatas() {
|
||||||
|
|
||||||
$this->maintainer = "mitsukarenai";
|
$this->maintainer = 'mitsukarenai';
|
||||||
$this->name = "Numerama";
|
$this->name = 'Numerama';
|
||||||
$this->uri = "http://www.numerama.com/";
|
$this->uri = 'http://www.numerama.com/';
|
||||||
$this->description = "Returns the 5 newest posts from Numerama (full text)";
|
$this->description = 'Returns the 5 newest posts from Numerama (full text)';
|
||||||
$this->update = "2015-10-12";
|
$this->update = '2016-07-19';
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData(array $param){
|
|
||||||
|
|
||||||
function NumeramaStripCDATA($string) {
|
|
||||||
$string = str_replace('<![CDATA[', '', $string);
|
|
||||||
$string = str_replace(']]>', '', $string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function NumeramaExtractContent($url)
|
|
||||||
{
|
|
||||||
$html2 = file_get_html($url);
|
|
||||||
$text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
|
||||||
$text = '<img alt="" style="max-width:300px;" src="'.$html2->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
|
||||||
$text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
|
|
||||||
return $text;
|
|
||||||
}
|
|
||||||
|
|
||||||
$html = $this->file_get_html('http://www.numerama.com/feed/') or $this->returnError('Could not request Numerama.', 404);
|
|
||||||
$limit = 0;
|
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
|
||||||
if($limit < 5) {
|
|
||||||
$item = new \Item();
|
|
||||||
$item->title = html_entity_decode(NumeramaStripCDATA($element->find('title', 0)->innertext));
|
|
||||||
$item->author = NumeramaStripCDATA($element->find('dc:creator', 0)->innertext);
|
|
||||||
$item->uri = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
|
||||||
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
|
||||||
$item->content = NumeramaExtractContent($item->uri);
|
|
||||||
$this->items[] = $item;
|
|
||||||
$limit++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getName(){
|
public function collectData(array $param) {
|
||||||
|
|
||||||
|
function NumeramaStripCDATA($string) {
|
||||||
|
$string = str_replace('<![CDATA[', '', $string);
|
||||||
|
$string = str_replace(']]>', '', $string);
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function NumeramaExtractContent($bridge, $url)
|
||||||
|
{
|
||||||
|
$html2 = $bridge->file_get_html($url);
|
||||||
|
$text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
||||||
|
$text = '<img alt="" style="max-width:300px;" src="'.$html2->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
||||||
|
$text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
|
$html = $this->file_get_html('http://www.numerama.com/feed/') or $this->returnError('Could not request Numerama.', 404);
|
||||||
|
$limit = 0;
|
||||||
|
|
||||||
|
foreach($html->find('item') as $element) {
|
||||||
|
if($limit < 5) {
|
||||||
|
$item = new \Item();
|
||||||
|
$item->title = html_entity_decode(NumeramaStripCDATA($element->find('title', 0)->innertext));
|
||||||
|
$item->author = NumeramaStripCDATA($element->find('dc:creator', 0)->innertext);
|
||||||
|
$item->uri = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
||||||
|
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||||
|
$item->content = NumeramaExtractContent($this, $item->uri);
|
||||||
|
$this->items[] = $item;
|
||||||
|
$limit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getName() {
|
||||||
return 'Numerama';
|
return 'Numerama';
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getURI(){
|
public function getURI() {
|
||||||
return 'http://www.numerama.com/';
|
return 'http://www.numerama.com/';
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getCacheDuration(){
|
public function getCacheDuration() {
|
||||||
return 1800; // 30min
|
return 1800; // 30min
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
75
bridges/WeLiveSecurityBridge.php
Normal file
75
bridges/WeLiveSecurityBridge.php
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
<?php
|
||||||
|
class WeLiveSecurityBridge extends BridgeAbstract {
|
||||||
|
|
||||||
|
public function loadMetadatas() {
|
||||||
|
$this->maintainer = 'ORelio';
|
||||||
|
$this->name = $this->getName();
|
||||||
|
$this->uri = $this->getURI();
|
||||||
|
$this->description = 'Returns the newest articles.';
|
||||||
|
$this->update = '2016-07-19';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function collectData(array $param) {
|
||||||
|
|
||||||
|
function ExtractFromDelimiters($string, $start, $end) {
|
||||||
|
if (strpos($string, $start) !== false) {
|
||||||
|
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
|
||||||
|
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
|
||||||
|
return $section_retrieved;
|
||||||
|
} return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function StripWithDelimiters($string, $start, $end) {
|
||||||
|
while (strpos($string, $start) !== false) {
|
||||||
|
$section_to_remove = substr($string, strpos($string, $start));
|
||||||
|
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
||||||
|
$string = str_replace($section_to_remove, '', $string);
|
||||||
|
} return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
$feed = $this->getURI().'feed/';
|
||||||
|
$html = $this->file_get_html($feed) or $this->returnError('Could not request '.$this->getName().': '.$feed, 500);
|
||||||
|
$limit = 0;
|
||||||
|
|
||||||
|
foreach ($html->find('item') as $element) {
|
||||||
|
if ($limit < 5) {
|
||||||
|
|
||||||
|
$article_image = $element->find('image', 0)->plaintext;
|
||||||
|
$article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
|
||||||
|
$article_summary = ExtractFromDelimiters($element->innertext, '<description><![CDATA[<p>', '</p>');
|
||||||
|
$article_html = file_get_contents($article_url) or $this->returnError('Could not request '.$this->getName().': '.$article_url, 500);
|
||||||
|
if (substr($article_html, 0, 2) == "\x1f\x8b") //http://www.gzip.org/zlib/rfc-gzip.html#header-trailer -> GZip ID1
|
||||||
|
$article_html = gzdecode($article_html); //Response is GZipped even if we didn't accept GZip!? Let's decompress...
|
||||||
|
$article_html = str_get_html($article_html); //Now we have our HTML data. But still, that's an important HTTP violation...
|
||||||
|
$article_content = $article_html->find('div.wlistingsingletext', 0)->innertext;
|
||||||
|
$article_content = StripWithDelimiters($article_content, '<script', '</script>');
|
||||||
|
$article_content = '<p><img src="'.$article_image.'" /></p>'
|
||||||
|
.'<p><b>'.$article_summary.'</b></p>'
|
||||||
|
.trim($article_content);
|
||||||
|
|
||||||
|
$item = new \Item();
|
||||||
|
$item->uri = $article_url;
|
||||||
|
$item->thumbnailUri = $article_image;
|
||||||
|
$item->title = $element->find('title', 0)->plaintext;
|
||||||
|
$item->author = $article_html->find('a[rel=author]', 0)->plaintext;
|
||||||
|
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||||
|
$item->content = $article_content;
|
||||||
|
$this->items[] = $item;
|
||||||
|
$limit++;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getName() {
|
||||||
|
return 'We Live Security';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getURI() {
|
||||||
|
return 'http://www.welivesecurity.com/';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCacheDuration() {
|
||||||
|
return 3600; //1 hour
|
||||||
|
}
|
||||||
|
}
|
308
bridges/ZDNetBridge.php
Normal file
308
bridges/ZDNetBridge.php
Normal file
|
@ -0,0 +1,308 @@
|
||||||
|
<?php
|
||||||
|
class ZDNetBridge extends BridgeAbstract {
|
||||||
|
|
||||||
|
public function loadMetadatas() {
|
||||||
|
|
||||||
|
$this->maintainer = 'ORelio';
|
||||||
|
$this->name = $this->getName();
|
||||||
|
$this->uri = $this->getURI();
|
||||||
|
$this->description = 'Returns the newest articles.';
|
||||||
|
$this->update = '2016-07-18';
|
||||||
|
|
||||||
|
$this->parameters[] =
|
||||||
|
// http://www.zdnet.com/zdnet.opml
|
||||||
|
'[
|
||||||
|
{
|
||||||
|
"name" : "Feed",
|
||||||
|
"type" : "list",
|
||||||
|
"identifier" : "feed",
|
||||||
|
"values" :
|
||||||
|
[
|
||||||
|
{ "name" : "---- Select ----", "value" : "" },
|
||||||
|
|
||||||
|
{ "name" : "", "value" : "" },
|
||||||
|
{ "name" : "Subscribe to ZDNet RSS Feeds", "value" : "" },
|
||||||
|
|
||||||
|
{ "name" : " All Blogs", "value" : "blog" },
|
||||||
|
{ "name" : " Just News", "value" : "news" },
|
||||||
|
{ "name" : " All Reviews", "value" : "topic/reviews" },
|
||||||
|
{ "name" : " Latest Downloads", "value" : "downloads!recent" },
|
||||||
|
{ "name" : " Latest Articles", "value" : "/" },
|
||||||
|
{ "name" : " Latest Australia Articles", "value" : "au" },
|
||||||
|
{ "name" : " Latest UK Articles", "value" : "uk" },
|
||||||
|
{ "name" : " Latest US Articles", "value" : "us" },
|
||||||
|
{ "name" : " Latest Asia Articles", "value" : "as" },
|
||||||
|
|
||||||
|
{ "name" : "", "value" : "" },
|
||||||
|
{ "name" : "Keep up with ZDNet Blogs RSS:", "value" : "" },
|
||||||
|
|
||||||
|
{ "name" : " Transforming the Datacenter", "value" : "blog/transforming-datacenter" },
|
||||||
|
{ "name" : " SMB India", "value" : "blog/smb-india" },
|
||||||
|
{ "name" : " Indonesia BizTech", "value" : "blog/indonesia-biztech" },
|
||||||
|
{ "name" : " Hong Kong Techie", "value" : "blog/hong-kong-techie" },
|
||||||
|
{ "name" : " Tech Taiwan", "value" : "blog/tech-taiwan" },
|
||||||
|
{ "name" : " Startup India", "value" : "blog/startup-india" },
|
||||||
|
{ "name" : " Starting Up Asia", "value" : "blog/starting-up-asia" },
|
||||||
|
{ "name" : " Next-Gen Partner", "value" : "blog/partner" },
|
||||||
|
{ "name" : " Post-PC Developments", "value" : "blog/post-pc" },
|
||||||
|
{ "name" : " Benelux", "value" : "blog/benelux" },
|
||||||
|
{ "name" : " Heat Sink", "value" : "blog/heat-sink" },
|
||||||
|
{ "name" : " Italy's got tech", "value" : "blog/italy" },
|
||||||
|
{ "name" : " African Enterprise", "value" : "blog/african-enterprise" },
|
||||||
|
{ "name" : " New Tech for Old India", "value" : "blog/new-india" },
|
||||||
|
{ "name" : " Estonia Uncovered", "value" : "blog/estonia" },
|
||||||
|
{ "name" : " IT Iberia", "value" : "blog/iberia" },
|
||||||
|
{ "name" : " Brazil Tech", "value" : "blog/brazil" },
|
||||||
|
{ "name" : " 500 words into the future", "value" : "blog/500-words-into-the-future" },
|
||||||
|
{ "name" : " ÜberTech", "value" : "blog/ubertech" },
|
||||||
|
{ "name" : " All About Microsoft", "value" : "blog/microsoft" },
|
||||||
|
{ "name" : " Back office", "value" : "blog/back-office" },
|
||||||
|
{ "name" : " Barker Bites Back", "value" : "blog/barker-bites-back" },
|
||||||
|
{ "name" : " Between the Lines", "value" : "blog/btl" },
|
||||||
|
{ "name" : " Big on Data", "value" : "blog/big-data" },
|
||||||
|
{ "name" : " bootstrappr", "value" : "blog/bootstrappr" },
|
||||||
|
{ "name" : " By The Way", "value" : "blog/by-the-way" },
|
||||||
|
{ "name" : " Central European Processing", "value" : "blog/central-europe" },
|
||||||
|
{ "name" : " Cloud Builders", "value" : "blog/cloud-builders" },
|
||||||
|
{ "name" : " Communication Breakdown", "value" : "blog/communication-breakdown" },
|
||||||
|
{ "name" : " Collaboration 2.0", "value" : "blog/collaboration" },
|
||||||
|
{ "name" : " Constellation Research", "value" : "blog/constellation" },
|
||||||
|
{ "name" : " Consumerization: BYOD", "value" : "blog/consumerization" },
|
||||||
|
{ "name" : " DIY-IT", "value" : "blog/diy-it" },
|
||||||
|
{ "name" : " Enterprise Web 2.0", "value" : "blog/hinchcliffe" },
|
||||||
|
{ "name" : " Five Nines: The Next Gen Datacenter", "value" : "blog/datacenter" },
|
||||||
|
{ "name" : " Forrester Research", "value" : "blog/forrester" },
|
||||||
|
{ "name" : " Full Duplex", "value" : "blog/full-duplex" },
|
||||||
|
{ "name" : " Gen Why?", "value" : "blog/gen-why" },
|
||||||
|
{ "name" : " Hardware 2.0", "value" : "blog/hardware" },
|
||||||
|
{ "name" : " Identity Matters", "value" : "blog/identity" },
|
||||||
|
{ "name" : " iGeneration", "value" : "blog/igeneration" },
|
||||||
|
{ "name" : " Internet of Everything", "value" : "blog/cisco" },
|
||||||
|
{ "name" : " Beyond IT Failure", "value" : "blog/projectfailures" },
|
||||||
|
{ "name" : " Jamie's Mostly Linux Stuff", "value" : "blog/jamies-mostly-linux-stuff" },
|
||||||
|
{ "name" : " Jack's Blog", "value" : "blog/jacks-blog" },
|
||||||
|
{ "name" : " Laptops & Desktops", "value" : "blog/computers" },
|
||||||
|
{ "name" : " Linux and Open Source", "value" : "blog/open-source" },
|
||||||
|
{ "name" : " London Calling", "value" : "blog/london" },
|
||||||
|
{ "name" : " Mapping Babel", "value" : "blog/mapping-babel" },
|
||||||
|
{ "name" : " Mixed Signals", "value" : "blog/mixed-signals" },
|
||||||
|
{ "name" : " Mobile India", "value" : "blog/mobile-india" },
|
||||||
|
{ "name" : " Mobile News", "value" : "blog/mobile-news" },
|
||||||
|
{ "name" : " Networking", "value" : "blog/networking" },
|
||||||
|
{ "name" : " Norse Code", "value" : "blog/norse-code" },
|
||||||
|
{ "name" : " Null Pointer", "value" : "blog/null-pointer" },
|
||||||
|
{ "name" : " The Full Tilt", "value" : "blog/the-full-tilt" },
|
||||||
|
{ "name" : " Pinoy Post", "value" : "blog/pinoy-post" },
|
||||||
|
{ "name" : " Practically Tech", "value" : "blog/practically-tech" },
|
||||||
|
{ "name" : " Product Central", "value" : "blog/product-central" },
|
||||||
|
{ "name" : " Pulp Tech", "value" : "blog/violetblue" },
|
||||||
|
{ "name" : " Qubits and Pieces", "value" : "blog/qubits-and-pieces" },
|
||||||
|
{ "name" : " Securify This!", "value" : "blog/securify-this" },
|
||||||
|
{ "name" : " Service Oriented", "value" : "blog/service-oriented" },
|
||||||
|
{ "name" : " Small Talk", "value" : "blog/small-talk" },
|
||||||
|
{ "name" : " Small Business Matters", "value" : "blog/small-business-matters" },
|
||||||
|
{ "name" : " Smartphones and Cell Phones", "value" : "blog/cell-phones" },
|
||||||
|
{ "name" : " Social Business", "value" : "blog/feeds" },
|
||||||
|
{ "name" : " Social CRM: The Conversation", "value" : "blog/crm" },
|
||||||
|
{ "name" : " Software & Services Safari", "value" : "blog/sommer" },
|
||||||
|
{ "name" : " Storage Bits", "value" : "blog/storage" },
|
||||||
|
{ "name" : " Stacking up Open Clouds", "value" : "blog/apac-redhat" },
|
||||||
|
{ "name" : " Techie Isles", "value" : "blog/techie-isles" },
|
||||||
|
{ "name" : " Technolatte", "value" : "blog/technolatte" },
|
||||||
|
{ "name" : " Tech Podium", "value" : "blog/tech-podium" },
|
||||||
|
{ "name" : " Tel Aviv Tech", "value" : "blog/tel-aviv" },
|
||||||
|
{ "name" : " Tech Broiler", "value" : "blog/perlow" },
|
||||||
|
{ "name" : " The SANMAN", "value" : "blog/the-sanman" },
|
||||||
|
{ "name" : " The open source revolution", "value" : "blog/the-open-source-revolution" },
|
||||||
|
{ "name" : " The German View", "value" : "blog/german" },
|
||||||
|
{ "name" : " The Ed Bott Report", "value" : "blog/bott" },
|
||||||
|
{ "name" : " The Mobile Gadgeteer", "value" : "blog/mobile-gadgeteer" },
|
||||||
|
{ "name" : " The Apple Core", "value" : "blog/apple" },
|
||||||
|
{ "name" : " Tom Foremski: IMHO", "value" : "blog/foremski" },
|
||||||
|
{ "name" : " Twisted Wire", "value" : "blog/twisted-wire" },
|
||||||
|
{ "name" : " Vive la tech", "value" : "blog/france" },
|
||||||
|
{ "name" : " Virtually Speaking", "value" : "blog/virtualization" },
|
||||||
|
{ "name" : " View from China", "value" : "blog/china" },
|
||||||
|
{ "name" : " Web design & Free Software", "value" : "blog/web-design-and-free-software" },
|
||||||
|
{ "name" : " ZDNet Government", "value" : "blog/government" },
|
||||||
|
{ "name" : " ZDNet UK Book Reviews", "value" : "blog/zdnet-uk-book-reviews" },
|
||||||
|
{ "name" : " ZDNet UK First Take", "value" : "blog/zdnet-uk-first-take" },
|
||||||
|
{ "name" : " Zero Day", "value" : "blog/security" },
|
||||||
|
|
||||||
|
{ "name" : "", "value" : "" },
|
||||||
|
{ "name" : "ZDNet Hot Topics RSS:", "value" : "" },
|
||||||
|
|
||||||
|
{ "name" : " Apple", "value" : "topic/apple" },
|
||||||
|
{ "name" : " Collaboration", "value" : "topic/collaboration" },
|
||||||
|
{ "name" : " Enterprise Software", "value" : "topic/enterprise-software" },
|
||||||
|
{ "name" : " Google", "value" : "topic/google" },
|
||||||
|
{ "name" : " Great debate", "value" : "topic/great-debate" },
|
||||||
|
{ "name" : " Hardware", "value" : "topic/hardware" },
|
||||||
|
{ "name" : " IBM", "value" : "topic/ibm" },
|
||||||
|
{ "name" : " iOS", "value" : "topic/ios" },
|
||||||
|
{ "name" : " iPhone", "value" : "topic/iphone" },
|
||||||
|
{ "name" : " iPad", "value" : "topic/ipad" },
|
||||||
|
{ "name" : " IT Priorities", "value" : "topic/it-priorities" },
|
||||||
|
{ "name" : " Laptops", "value" : "topic/laptops" },
|
||||||
|
{ "name" : " Legal", "value" : "topic/legal" },
|
||||||
|
{ "name" : " Linux", "value" : "topic/linux" },
|
||||||
|
{ "name" : " Microsoft", "value" : "topic/microsoft" },
|
||||||
|
{ "name" : " Mobile OS", "value" : "topic/mobile-os" },
|
||||||
|
{ "name" : " Mobility", "value" : "topic/mobility" },
|
||||||
|
{ "name" : " Networking", "value" : "topic/networking" },
|
||||||
|
{ "name" : " Oracle", "value" : "topic/oracle" },
|
||||||
|
{ "name" : " Processors", "value" : "topic/processors" },
|
||||||
|
{ "name" : " Samsung", "value" : "topic/samsung" },
|
||||||
|
{ "name" : " Security", "value" : "topic/security" },
|
||||||
|
{ "name" : " Small business: going big on mobility", "value" : "topic/small-business-going-big-on-mobility" },
|
||||||
|
|
||||||
|
{ "name" : "", "value" : "" },
|
||||||
|
{ "name" : "Product Blogs:", "value" : "" },
|
||||||
|
|
||||||
|
{ "name" : " Digital Cameras & Camcorders", "value" : "blog/digitalcameras" },
|
||||||
|
{ "name" : " Home Theater", "value" : "blog/home-theater" },
|
||||||
|
{ "name" : " Laptops and Desktops", "value" : "blog/computers" },
|
||||||
|
{ "name" : " The Mobile Gadgeteer", "value" : "blog/mobile-gadgeteer" },
|
||||||
|
{ "name" : " Smartphones and Cell Phones", "value" : "blog/cell-phones" },
|
||||||
|
{ "name" : " The ToyBox", "value" : "blog/gadgetreviews" },
|
||||||
|
|
||||||
|
{ "name" : "", "value" : "" },
|
||||||
|
{ "name" : "Vertical Blogs:", "value" : "" },
|
||||||
|
|
||||||
|
{ "name" : " ZDNet Education", "value" : "blog/education" },
|
||||||
|
{ "name" : " ZDNet Healthcare", "value" : "blog/healthcare" },
|
||||||
|
{ "name" : " ZDNet Government", "value" : "blog/government" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]';
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public function collectData(array $param) {
|
||||||
|
|
||||||
|
function StripCDATA($string) {
|
||||||
|
$string = str_replace('<![CDATA[', '', $string);
|
||||||
|
$string = str_replace(']]>', '', $string);
|
||||||
|
return trim($string);
|
||||||
|
}
|
||||||
|
|
||||||
|
function ExtractFromDelimiters($string, $start, $end) {
|
||||||
|
if (strpos($string, $start) !== false) {
|
||||||
|
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
|
||||||
|
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
|
||||||
|
return $section_retrieved;
|
||||||
|
} return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function StripWithDelimiters($string, $start, $end) {
|
||||||
|
while (strpos($string, $start) !== false) {
|
||||||
|
$section_to_remove = substr($string, strpos($string, $start));
|
||||||
|
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
|
||||||
|
$string = str_replace($section_to_remove, '', $string);
|
||||||
|
} return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function StripRecursiveHTMLSection($string, $tag_name, $tag_start) {
|
||||||
|
$open_tag = '<'.$tag_name;
|
||||||
|
$close_tag = '</'.$tag_name.'>';
|
||||||
|
$close_tag_length = strlen($close_tag);
|
||||||
|
if (strpos($tag_start, $open_tag) === 0) {
|
||||||
|
while (strpos($string, $tag_start) !== false) {
|
||||||
|
$max_recursion = 100;
|
||||||
|
$section_to_remove = null;
|
||||||
|
$section_start = strpos($string, $tag_start);
|
||||||
|
$search_offset = $section_start;
|
||||||
|
do {
|
||||||
|
$max_recursion--;
|
||||||
|
$section_end = strpos($string, $close_tag, $search_offset);
|
||||||
|
$search_offset = $section_end + $close_tag_length;
|
||||||
|
$section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length);
|
||||||
|
$open_tag_count = substr_count($section_to_remove, $open_tag);
|
||||||
|
$close_tag_count = substr_count($section_to_remove, $close_tag);
|
||||||
|
} while ($open_tag_count > $close_tag_count && $max_recursion > 0);
|
||||||
|
$string = str_replace($section_to_remove, '', $string);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
$baseUri = $this->getURI();
|
||||||
|
$feed = $param['feed'];
|
||||||
|
if (empty($feed))
|
||||||
|
$this->returnError('Please select a feed to display.', 400);
|
||||||
|
if (strpos($feed, 'downloads!') !== false) {
|
||||||
|
$feed = str_replace('downloads!', '', $feed);
|
||||||
|
$baseUri = str_replace('www.', 'downloads.', $baseUri);
|
||||||
|
}
|
||||||
|
if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
|
||||||
|
$this->returnError('Invalid "feed" parameter.', 400);
|
||||||
|
$url = $baseUri.trim($feed, '/').'/rss.xml';
|
||||||
|
$html = $this->file_get_html($url) or $this->returnError('Could not request ZDNet: '.$url, 500);
|
||||||
|
$limit = 0;
|
||||||
|
|
||||||
|
foreach ($html->find('item') as $element) {
|
||||||
|
if ($limit < 10) {
|
||||||
|
$article_url = preg_replace('/([^#]+)#ftag=.*/', '$1', StripCDATA(ExtractFromDelimiters($element->innertext, '<link>', '</link>')));
|
||||||
|
$article_author = StripCDATA(ExtractFromDelimiters($element->innertext, 'role="author">', '<'));
|
||||||
|
$article_title = StripCDATA($element->find('title', 0)->plaintext);
|
||||||
|
$article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
|
||||||
|
$article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
|
||||||
|
$article = $this->file_get_html($article_url) or $this->returnError('Could not request ZDNet: '.$article_url, 500);
|
||||||
|
|
||||||
|
if (!empty($article_author))
|
||||||
|
$author = $article_author;
|
||||||
|
else {
|
||||||
|
$author = $article->find('meta[name=author]', 0);
|
||||||
|
if (is_object($author))
|
||||||
|
$author = $author->content;
|
||||||
|
else $author = 'ZDNet';
|
||||||
|
}
|
||||||
|
|
||||||
|
$thumbnail = $article->find('meta[itemprop=image]', 0);
|
||||||
|
if (is_object($thumbnail))
|
||||||
|
$thumbnail = $thumbnail->content;
|
||||||
|
else $thumbnail = 'http://zdnet1.cbsistatic.com/fly/bundles/zdnetcss/images/logos/logo-192x192.png';
|
||||||
|
|
||||||
|
$contents = $article->find('article', 0)->innertext;
|
||||||
|
foreach (array(
|
||||||
|
'<div class="shareBar"',
|
||||||
|
'<div class="shortcodeGalleryWrapper"',
|
||||||
|
'<div class="relatedContent',
|
||||||
|
'<div class="downloadNow',
|
||||||
|
'<div data-shortcode',
|
||||||
|
'<div id="sharethrough',
|
||||||
|
'<div id="inpage-video'
|
||||||
|
) as $div_start) {
|
||||||
|
$contents = StripRecursiveHTMLSection($contents , 'div', $div_start);
|
||||||
|
}
|
||||||
|
$contents = StripWithDelimiters($contents, '<script', '</script>');
|
||||||
|
$contents = StripWithDelimiters($contents, '<meta itemprop="image"', '>');
|
||||||
|
$contents = StripWithDelimiters($contents, '<section class="sharethrough-top', '</section>');
|
||||||
|
$contents = '<p><b>'.$article_subtitle.'</b></p>'.trim($contents);
|
||||||
|
|
||||||
|
$item = new \Item();
|
||||||
|
$item->author = $author;
|
||||||
|
$item->uri = $article_url;
|
||||||
|
$item->title = $article_title;
|
||||||
|
$item->thumbnailUri = $thumbnail;
|
||||||
|
$item->timestamp = $article_timestamp;
|
||||||
|
$item->content = $contents;
|
||||||
|
$this->items[] = $item;
|
||||||
|
$limit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getName() {
|
||||||
|
return 'ZDNet Bridge';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getURI() {
|
||||||
|
return 'http://www.zdnet.com/';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCacheDuration() {
|
||||||
|
return 3600;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue