2014-05-26 19:45:10 +02:00
|
|
|
<?php
|
2016-08-04 20:06:53 +02:00
|
|
|
define('WORDPRESS_TYPE_ATOM', 1); // Content is of type ATOM
|
|
|
|
define('WORDPRESS_TYPE_RSS', 2); // Content is of type RSS
|
2016-08-28 20:07:56 +02:00
|
|
|
class WordPressBridge extends HttpCachingBridgeAbstract {
|
2014-05-26 19:45:10 +02:00
|
|
|
|
|
|
|
private $url;
|
2016-08-04 21:56:11 +02:00
|
|
|
public $sitename; // Name of the site
|
2014-05-26 19:45:10 +02:00
|
|
|
|
2015-11-05 16:50:18 +01:00
|
|
|
public function loadMetadatas() {
|
|
|
|
|
|
|
|
$this->maintainer = "aledeg";
|
|
|
|
$this->name = "Wordpress Bridge";
|
2016-08-02 15:46:21 +02:00
|
|
|
$this->uri = "https://wordpress.org/";
|
2015-11-05 16:50:18 +01:00
|
|
|
$this->description = "Returns the 3 newest full posts of a Wordpress blog";
|
|
|
|
|
2016-08-22 01:25:56 +02:00
|
|
|
$this->parameters[] = array(
|
|
|
|
'url'=>array(
|
|
|
|
'name'=>'Blog URL',
|
|
|
|
'required'=>true
|
|
|
|
)
|
|
|
|
);
|
2015-11-05 16:50:18 +01:00
|
|
|
}
|
|
|
|
|
2016-08-04 20:06:53 +02:00
|
|
|
// Returns the content type for a given html dom
|
2016-08-06 16:00:56 +02:00
|
|
|
private function DetectContentType($html){
|
2016-08-04 20:06:53 +02:00
|
|
|
if($html->find('entry'))
|
|
|
|
return WORDPRESS_TYPE_ATOM;
|
|
|
|
if($html->find('item'))
|
|
|
|
return WORDPRESS_TYPE_RSS;
|
|
|
|
return WORDPRESS_TYPE_ATOM; // Make ATOM default
|
|
|
|
}
|
|
|
|
|
2016-07-08 19:06:35 +02:00
|
|
|
// Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url')
|
2016-08-06 16:00:56 +02:00
|
|
|
private function ReplaceLinkTagsWithUrlTags($element){
|
2016-08-04 20:06:53 +02:00
|
|
|
// We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom)
|
|
|
|
$element_text = $element->outertext;
|
|
|
|
$element_text = str_replace('<link>', '<url>', $element_text);
|
|
|
|
$element_text = str_replace('</link>', '</url>', $element_text);
|
2016-08-04 20:40:38 +02:00
|
|
|
$element_text = str_replace('<link ', '<url ', $element_text);
|
2016-08-04 20:06:53 +02:00
|
|
|
return str_get_html($element_text);
|
|
|
|
}
|
|
|
|
|
2016-08-06 16:00:56 +02:00
|
|
|
private function StripCDATA($string) {
|
2016-08-04 20:15:28 +02:00
|
|
|
$string = str_replace('<![CDATA[', '', $string);
|
|
|
|
$string = str_replace(']]>', '', $string);
|
|
|
|
return $string;
|
|
|
|
}
|
2016-08-02 15:46:21 +02:00
|
|
|
|
2016-08-06 16:00:56 +02:00
|
|
|
private function ClearContent($content) {
|
2016-08-04 21:42:06 +02:00
|
|
|
$content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content);
|
2016-08-04 20:15:28 +02:00
|
|
|
$content = preg_replace('/<div class="wpa".*/', '', $content);
|
2016-08-04 21:42:06 +02:00
|
|
|
$content = preg_replace('/<form.*\/form>/', '', $content);
|
2016-08-04 20:15:28 +02:00
|
|
|
return $content;
|
|
|
|
}
|
2016-08-02 15:46:21 +02:00
|
|
|
|
2016-08-25 01:24:53 +02:00
|
|
|
public function collectData(){
|
|
|
|
$param=$this->parameters[$this->queriedContext];
|
2014-05-26 19:45:10 +02:00
|
|
|
$this->processParams($param);
|
|
|
|
|
|
|
|
if (!$this->hasUrl()) {
|
2016-08-17 14:45:08 +02:00
|
|
|
$this->returnClientError('You must specify a URL');
|
2014-05-26 19:45:10 +02:00
|
|
|
}
|
|
|
|
|
2016-08-04 20:45:21 +02:00
|
|
|
$this->url = $this->url.'/feed/atom';
|
2016-07-08 19:06:35 +02:00
|
|
|
$html = $this->getSimpleHTMLDOM($this->url) or $this->returnServerError("Could not request {$this->url}.");
|
2016-08-04 20:06:53 +02:00
|
|
|
|
|
|
|
// Notice: We requested an ATOM feed, however some sites return RSS feeds instead!
|
|
|
|
$type = $this->DetectContentType($html);
|
|
|
|
|
|
|
|
if($type === WORDPRESS_TYPE_RSS)
|
|
|
|
$posts = $html->find('item');
|
|
|
|
else
|
|
|
|
$posts = $html->find('entry');
|
|
|
|
|
2016-08-04 20:45:21 +02:00
|
|
|
if(!empty($posts) ) {
|
2016-08-04 21:56:11 +02:00
|
|
|
$this->sitename = $html->find('title', 0)->plaintext;
|
2016-08-04 20:45:21 +02:00
|
|
|
$i=0;
|
2016-08-04 20:06:53 +02:00
|
|
|
|
|
|
|
foreach ($posts as $article) {
|
2014-05-26 19:45:10 +02:00
|
|
|
if($i < 3) {
|
2016-08-02 15:46:21 +02:00
|
|
|
|
2016-08-22 18:55:59 +02:00
|
|
|
$item = array();
|
2016-08-04 20:12:51 +02:00
|
|
|
|
2016-08-04 20:06:53 +02:00
|
|
|
$article = $this->ReplaceLinkTagsWithUrlTags($article);
|
|
|
|
|
|
|
|
if($type === WORDPRESS_TYPE_RSS){
|
2016-08-22 18:55:59 +02:00
|
|
|
$item['uri'] = $article->find('url', 0)->innertext; // 'link' => 'url'!
|
|
|
|
$item['title'] = $article->find('title', 0)->plaintext;
|
|
|
|
$item['author'] = trim($this->StripCDATA($article->find('dc:creator', 0)->innertext));
|
|
|
|
$item['timestamp'] = strtotime($article->find('pubDate', 0)->innertext);
|
2016-08-04 20:06:53 +02:00
|
|
|
} else {
|
2016-08-22 18:55:59 +02:00
|
|
|
$item['uri'] = $article->find('url', 0)->getAttribute('href'); // 'link' => 'url'!
|
|
|
|
$item['title'] = $this->StripCDATA($article->find('title', 0)->plaintext);
|
|
|
|
$item['author'] = trim($article->find('author', 0)->innertext);
|
|
|
|
$item['timestamp'] = strtotime($article->find('updated', 0)->innertext);
|
2016-08-04 20:06:53 +02:00
|
|
|
}
|
2016-08-04 20:40:38 +02:00
|
|
|
|
2016-08-28 20:07:56 +02:00
|
|
|
if($this->get_cached_time($item['uri']) <= strtotime('-24 hours'))
|
|
|
|
$this->remove_from_cache($item['uri']);
|
|
|
|
|
|
|
|
$article_html = $this->get_cached($item['uri']);
|
2016-08-04 20:40:38 +02:00
|
|
|
|
2016-08-04 21:06:12 +02:00
|
|
|
// Attempt to find most common content div
|
2016-08-22 18:55:59 +02:00
|
|
|
if(!isset($item['content'])){
|
2016-08-04 21:06:12 +02:00
|
|
|
$article = $article_html->find('article', 0);
|
|
|
|
if(!empty($article)){
|
2016-08-22 18:55:59 +02:00
|
|
|
$item['content'] = $this->ClearContent($article->innertext);
|
2016-08-04 21:06:12 +02:00
|
|
|
}
|
2016-08-04 20:40:38 +02:00
|
|
|
}
|
2016-08-04 21:06:12 +02:00
|
|
|
|
|
|
|
// another common content div
|
2016-08-22 18:55:59 +02:00
|
|
|
if(!isset($item['content'])){
|
2016-08-04 21:06:12 +02:00
|
|
|
$article = $article_html->find('.single-content', 0);
|
|
|
|
if(!empty($article)){
|
2016-08-22 18:55:59 +02:00
|
|
|
$item['content'] = $this->ClearContent($article->innertext);
|
2016-08-04 21:06:12 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// for old WordPress themes without HTML5
|
2016-08-22 18:55:59 +02:00
|
|
|
if(!isset($item['content'])){
|
2016-08-04 21:06:12 +02:00
|
|
|
$article = $article_html->find('.post', 0);
|
|
|
|
if(!empty($article)){
|
2016-08-22 18:55:59 +02:00
|
|
|
$item['content'] = $this->ClearContent($article->innertext);
|
2016-08-04 21:06:12 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-04 20:12:51 +02:00
|
|
|
$this->items[] = $item;
|
2014-05-26 19:45:10 +02:00
|
|
|
$i++;
|
|
|
|
}
|
2016-07-08 19:06:35 +02:00
|
|
|
}
|
2016-08-04 20:45:21 +02:00
|
|
|
} else {
|
2016-08-17 14:45:08 +02:00
|
|
|
$this->returnServerError("Sorry, {$this->url} doesn't seem to be a Wordpress blog.");
|
2014-05-26 19:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getName() {
|
2016-08-04 21:56:11 +02:00
|
|
|
return "{$this->sitename} - Wordpress Bridge";
|
2014-05-26 19:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public function getCacheDuration() {
|
|
|
|
return 3600*3; // 3 hours
|
|
|
|
}
|
|
|
|
|
|
|
|
private function hasUrl() {
|
|
|
|
if (empty($this->url)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function processParams($param) {
|
2016-08-25 01:24:53 +02:00
|
|
|
$this->url = $param['url']['value'];
|
2014-05-26 19:45:10 +02:00
|
|
|
}
|
|
|
|
}
|