From 51a3a75aaca6c876e16c57cc376f31753cc73ea9 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 12:32:56 +0200 Subject: [PATCH] [Bridge] Add RSS 1.0 and 2.0 parser --- lib/Bridge.php | 51 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index f2e6525..f96e011 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -606,17 +606,33 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $rssContent = simplexml_load_string($content); $this->debugMessage('loaded RSS from ' . $url); - if(isset($rssContent->channel[0])){ // RSS format - // TODO insert RSS format detection - // For now we always assume RSS 2.0 - $this->collect_RSS_2_0_data($rssContent); - } elseif(isset($rssContent->entry[0])){ // ATOM format + $this->debugMessage('Detecting feed format/version'); + if(isset($rssContent->channel[0])){ + $this->debugMessage('Detected RSS format'); + if(isset($rssContent->item[0])){ + $this->debugMessage('Detected RSS 1.0 format'); + $this->collect_RSS_1_0_data($rssContent); + } else { + $this->debugMessage('Detected RSS 0.9x or 2.0 format'); + $this->collect_RSS_2_0_data($rssContent); + } + } elseif(isset($rssContent->entry[0])){ + $this->debugMessage('Detected ATOM format'); $this->collect_ATOM_data($rssContent); - } else { // Unknown format + } else { + $this->debugMessage('Unknown feed format/version'); $this->returnServerError('The feed format is unknown!'); } } + protected function collect_RSS_1_0_data($rssContent){ + $this->load_RSS_2_0_feed_data($rssContent->channel[0]); + foreach($rssContent->item as $item){ + $this->debugMessage('parsing item ' . var_export($item, true)); + $this->items[] = $this->parseRSSItem($item); + } + } + protected function collect_RSS_2_0_data($rssContent){ $rssContent = $rssContent->channel[0]; $this->debugMessage('RSS content is ===========\n' . var_export($rssContent, true) . '==========='); @@ -688,6 +704,29 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { return $item; } + protected function parseRSS_1_0_Item($feedItem){ + // 1.0 adds optional elements around the 0.91 standard + return $this->parseRSS_0_9_1_Item($feedItem); + } + + protected function parseRSS_2_0_Item($feedItem){ + // Primary data is compatible to 0.91 + $item = $this->parseRSS_0_9_1_Item($feedItem); + if(isset($feedItem->pubDate)) $item['timestamp'] = strtotime($feedItem->pubDate); + if(isset($feedItem->author)){ + $item['author'] = $feedItem->author; + } else { + // Feed might use 'dc' namespace + $namespaces = $feedItem->getNamespaces(true); + if(isset($namespaces['dc'])){ + $dc = $feedItem->children($namespaces['dc']); + if(isset($dc->creator)) + $item['author'] = $dc->creator; + } + } + return $item; + } + /** * Method should return, from a source RSS item given by lastRSS, one of our Items objects * @param $item the input rss item