From 0a008c4eb3c9e415fd55cc174ec5319a66265180 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Thu, 4 Aug 2016 20:06:53 +0200 Subject: [PATCH] [WordPress] Support atom AND rss for feed replies Some sites return RSS feeds instead of ATOM feeds even though ATOM feeds were specifically requested (/feeds/atom)! This bridge will now detect ATOM/RSS feeds and change behavior accordingly. --- bridges/WordPressBridge.php | 52 ++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 7a1b12e..27c231d 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -1,4 +1,6 @@ name = "Wordpress Bridge"; $this->uri = "https://wordpress.org/"; $this->description = "Returns the 3 newest full posts of a Wordpress blog"; - $this->update = "2016-08-02"; + $this->update = "2016-08-04"; $this->parameters[] = '[ @@ -22,6 +24,24 @@ class WordPressBridge extends BridgeAbstract { } + // Returns the content type for a given html dom + function DetectContentType($html){ + if($html->find('entry')) + return WORDPRESS_TYPE_ATOM; + if($html->find('item')) + return WORDPRESS_TYPE_RSS; + return WORDPRESS_TYPE_ATOM; // Make ATOM default + } + + // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') + function ReplaceLinkTagsWithUrlTags($element){ + // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) + $element_text = $element->outertext; + $element_text = str_replace('', '', $element_text); + $element_text = str_replace('', '', $element_text); + return str_get_html($element_text); + } + public function collectData(array $param) { function StripCDATA($string) { @@ -44,17 +64,35 @@ class WordPressBridge extends BridgeAbstract { $this->url = $this->url.'/feed/atom'; $html = $this->file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404); - $posts = $html->find('entry'); + + // Notice: We requested an ATOM feed, however some sites return RSS feeds instead! + $type = $this->DetectContentType($html); + + if($type === WORDPRESS_TYPE_RSS) + $posts = $html->find('item'); + else + $posts = $html->find('entry'); + + if(!empty($posts) ) { $this->name = $html->find('title', 0)->plaintext; $i=0; - foreach ($html->find('entry') as $article) { + foreach ($posts as $article) { if($i < 3) { - $this->items[$i]->uri = $article->find('link', 0)->getAttribute('href'); - $this->items[$i]->title = StripCDATA($article->find('title', 0)->plaintext); - $this->items[$i]->author = trim($article->find('author', 0)->innertext); - $this->items[$i]->timestamp = strtotime($article->find('updated', 0)->innertext); + $article = $this->ReplaceLinkTagsWithUrlTags($article); + + if($type === WORDPRESS_TYPE_RSS){ + $this->items[$i]->uri = $article->find('url', 0)->innertext; // 'link' => 'url'! + $this->items[$i]->title = $article->find('title', 0)->plaintext; + $this->items[$i]->author = trim($article->find('dc:creator', 0)->innertext); + $this->items[$i]->timestamp = strtotime($article->find('pubDate', 0)->innertext); + } else { + $this->items[$i]->uri = $article->find('url', 0)->getAttribute('href'); // 'link' => 'url'! + $this->items[$i]->title = StripCDATA($article->find('title', 0)->plaintext); + $this->items[$i]->author = trim($article->find('author', 0)->innertext); + $this->items[$i]->timestamp = strtotime($article->find('updated', 0)->innertext); + } $article_html = $this->file_get_html($this->items[$i]->uri); $this->items[$i]->content = clearContent($article_html->find('article', 0)->innertext); if(empty($this->items[$i]->content))