diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 6c7d5f6..114e651 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -1,10 +1,7 @@ outertext; - $element_text = str_replace('', '', $element_text); - $element_text = str_replace('', '', $element_text); - $element_text = str_replace('', '', $string); - return $string; - } - - private function ClearContent($content) { + private function clearContent($content) { $content = preg_replace('/]*>[^<]*<\/script>/', '', $content); $content = preg_replace('/
/', '', $content); return $content; } + protected function parseItem($newItem){ + $item=parent::parseItem($newItem); + + $article_html = $this->getSimpleHTMLDOMCached($item['uri']); + + $article=null; + switch(true){ + case !is_null($article_html->find('article',0)): + // most common content div + $article = $article_html->find('article', 0); + break; + case !is_null($article_html->find('.single-content',0)): + // another common content div + $article = $article_html->find('.single-content', 0); + break; + case !is_null($article_html->find('.post',0)): + // for old WordPress themes without HTML5 + $article = $article_html->find('.post', 0); + break; + } + + if(!is_null($article)){ + $item['content'] = $this->clearContent($article->innertext); + } + + return $item; + } + public function collectData(){ - - $html = $this->getSimpleHTMLDOM($this->getURI().'/feed/atom') - or $this->returnServerError("Could not request ".$this->getURI().'/feed/atom'); - - // Notice: We requested an ATOM feed, however some sites return RSS feeds instead! - if($html->find('entry')){ - $type=WORDPRESS_TYPE_ATOM; - }else if($html->find('item')){ - $type=WORDPRESS_TYPE_RSS; - }else{ - $type=WORDPRESS_TYPE_ATOM; // Make ATOM default + if(substr($this->getInput(url),0,srlen('http'))!=='http'){ + // just in case someone find a way to access local files by playing with the url + returnClientError('The url parameter must either refer to http or https protocol.'); } - if($type === WORDPRESS_TYPE_RSS) - $posts = $html->find('item'); - else - $posts = $html->find('entry'); + $this->collectExpandableDatas($this->getURI().'/feed/atom'); - if(!empty($posts) ) { - $this->sitename = $html->find('title', 0)->plaintext; - $i=0; - - foreach ($posts as $article) { - if($i < 3) { - - $item = array(); - - $article = $this->ReplaceLinkTagsWithUrlTags($article); - - if($type === WORDPRESS_TYPE_RSS){ - $item['uri'] = $article->find('url', 0)->innertext; // 'link' => 'url'! - $item['title'] = $article->find('title', 0)->plaintext; - $item['author'] = trim($this->StripCDATA($article->find('dc:creator', 0)->innertext)); - $item['timestamp'] = strtotime($article->find('pubDate', 0)->innertext); - } else { - $item['uri'] = $article->find('url', 0)->getAttribute('href'); // 'link' => 'url'! - $item['title'] = $this->StripCDATA($article->find('title', 0)->plaintext); - $item['author'] = trim($article->find('author', 0)->innertext); - $item['timestamp'] = strtotime($article->find('updated', 0)->innertext); - } - - $article_html = $this->getSimpleHTMLDOMCached($item['uri']); - - // Attempt to find most common content div - if(!isset($item['content'])){ - $article = $article_html->find('article', 0); - if(!empty($article)){ - $item['content'] = $this->ClearContent($article->innertext); - } - } - - // another common content div - if(!isset($item['content'])){ - $article = $article_html->find('.single-content', 0); - if(!empty($article)){ - $item['content'] = $this->ClearContent($article->innertext); - } - } - - // for old WordPress themes without HTML5 - if(!isset($item['content'])){ - $article = $article_html->find('.post', 0); - if(!empty($article)){ - $item['content'] = $this->ClearContent($article->innertext); - } - } - - $this->items[] = $item; - $i++; - } - } - } else { - $this->returnServerError("Sorry, ".$this->getURI()." doesn't seem to be a Wordpress blog."); - } - } - - public function getURI(){ - return $this->getInput('url'); - } - - public function getName() { - return "{$this->sitename} - Wordpress Bridge"; } public function getCacheDuration() {