forked from blallo/rss-bridge
Merge branch 'FeedExpander' of https://github.com/logmanoriginal/rss-bridge
This commit is contained in:
commit
671703cd37
6 changed files with 225 additions and 126 deletions
|
@ -1,40 +1,26 @@
|
||||||
<?php
|
<?php
|
||||||
class AcrimedBridge extends RssExpander{
|
class AcrimedBridge extends FeedExpander {
|
||||||
|
|
||||||
|
const MAINTAINER = "qwertygc";
|
||||||
|
const NAME = "Acrimed Bridge";
|
||||||
|
const URI = "http://www.acrimed.org/";
|
||||||
|
const DESCRIPTION = "Returns the newest articles.";
|
||||||
|
|
||||||
const MAINTAINER = "qwertygc";
|
public function collectData(){
|
||||||
const NAME = "Acrimed Bridge";
|
$this->collectExpandableDatas("http://www.acrimed.org/spip.php?page=backend");
|
||||||
const URI = "http://www.acrimed.org/";
|
}
|
||||||
const DESCRIPTION = "Returns the newest articles.";
|
|
||||||
|
|
||||||
public function collectData(){
|
protected function parseItem($newsItem){
|
||||||
|
$item = $this->parseRSS_2_0_Item($newsItem);
|
||||||
|
|
||||||
$this->collectExpandableDatas(static::URI.'spip.php?page=backend');
|
$hs = new HTMLSanitizer();
|
||||||
|
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);
|
||||||
|
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||||
|
$article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/");
|
||||||
|
$item['content'] = $article;
|
||||||
|
|
||||||
}
|
return $item;
|
||||||
|
}
|
||||||
protected function parseRSSItem($newsItem) {
|
|
||||||
|
|
||||||
$hs = new HTMLSanitizer();
|
|
||||||
|
|
||||||
$namespaces = $newsItem->getNameSpaces(true);
|
|
||||||
$dc = $newsItem->children($namespaces['dc']);
|
|
||||||
|
|
||||||
$item = array();
|
|
||||||
$item['uri'] = trim($newsItem->link);
|
|
||||||
$item['title'] = trim($newsItem->title);
|
|
||||||
$item['timestamp'] = strtotime($dc->date);
|
|
||||||
|
|
||||||
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);
|
|
||||||
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
|
|
||||||
$article = HTMLSanitizer::defaultImageSrcTo($article, static::URI);
|
|
||||||
|
|
||||||
$item['content'] = $article;
|
|
||||||
|
|
||||||
|
|
||||||
return $item;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getCacheDuration(){
|
public function getCacheDuration(){
|
||||||
return 4800; // 2 hours
|
return 4800; // 2 hours
|
||||||
|
|
62
bridges/FeedExpanderExampleBridge.php
Normal file
62
bridges/FeedExpanderExampleBridge.php
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
<?php
|
||||||
|
class FeedExpanderExampleBridge extends FeedExpander {
|
||||||
|
|
||||||
|
const MAINTAINER = 'logmanoriginal';
|
||||||
|
const NAME = 'FeedExpander Example';
|
||||||
|
const URI = '#';
|
||||||
|
const DESCRIPTION = 'Example bridge to test FeedExpander';
|
||||||
|
|
||||||
|
const PARAMETERS = array(
|
||||||
|
'Feed' => array(
|
||||||
|
'version' => array(
|
||||||
|
'name' => 'Version',
|
||||||
|
'type' => 'list',
|
||||||
|
'required' => true,
|
||||||
|
'title' => 'Select your feed format/version',
|
||||||
|
'defaultValue' => 'RSS 2.0',
|
||||||
|
'values' => array(
|
||||||
|
'RSS 0.91' => 'rss_0_9_1',
|
||||||
|
'RSS 1.0' => 'rss_1_0',
|
||||||
|
'RSS 2.0' => 'rss_2_0',
|
||||||
|
'ATOM 1.0' => 'atom_1_0'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
public function collectData(){
|
||||||
|
switch($this->getInput('version')){
|
||||||
|
case 'rss_0_9_1':
|
||||||
|
parent::collectExpandableDatas('http://static.userland.com/gems/backend/sampleRss.xml');
|
||||||
|
break;
|
||||||
|
case 'rss_1_0':
|
||||||
|
parent::collectExpandableDatas('http://feeds.nature.com/nature/rss/current?format=xml');
|
||||||
|
break;
|
||||||
|
case 'rss_2_0':
|
||||||
|
parent::collectExpandableDatas('http://feeds.rssboard.org/rssboard?format=xml');
|
||||||
|
break;
|
||||||
|
case 'atom_1_0':
|
||||||
|
parent::collectExpandableDatas('http://segfault.linuxmint.com/feed/atom/');
|
||||||
|
break;
|
||||||
|
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parseItem($newsItem) {
|
||||||
|
switch($this->getInput('version')){
|
||||||
|
case 'rss_0_9_1':
|
||||||
|
return $this->parseRSS_0_9_1_Item($newsItem);
|
||||||
|
break;
|
||||||
|
case 'rss_1_0':
|
||||||
|
return $this->parseRSS_1_0_Item($newsItem);
|
||||||
|
break;
|
||||||
|
case 'rss_2_0':
|
||||||
|
return $this->parseRSS_2_0_Item($newsItem);
|
||||||
|
break;
|
||||||
|
case 'atom_1_0':
|
||||||
|
return $this->parseATOMItem($newsItem);
|
||||||
|
break;
|
||||||
|
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,34 +1,22 @@
|
||||||
<?php
|
<?php
|
||||||
define("FREENEWS_RSS", 'http://feeds.feedburner.com/Freenews-Freebox?format=xml');
|
class FreenewsBridge extends FeedExpander {
|
||||||
class FreenewsBridge extends RssExpander {
|
|
||||||
|
|
||||||
const MAINTAINER = "mitsukarenai";
|
const MAINTAINER = "mitsukarenai";
|
||||||
const NAME = "Freenews";
|
const NAME = "Freenews";
|
||||||
const URI = "http://freenews.fr";
|
const URI = "http://freenews.fr";
|
||||||
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales.";
|
const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales.";
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
parent::collectExpandableDatas(FREENEWS_RSS);
|
parent::collectExpandableDatas('http://feeds.feedburner.com/Freenews-Freebox?format=xml');
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseRSSItem($newsItem) {
|
protected function parseItem($newsItem) {
|
||||||
$item = array();
|
$item = $this->parseRSS_2_0_Item($newsItem);
|
||||||
$item['title'] = trim($newsItem->title);
|
|
||||||
$this->debugMessage("item has for title \"".$item['title']."\"");
|
|
||||||
if(empty($newsItem->guid)) {
|
|
||||||
$item['uri'] = (string) $newsItem->link;
|
|
||||||
} else {
|
|
||||||
$item['uri'] = (string) $newsItem->guid;
|
|
||||||
}
|
|
||||||
// now load that uri from cache
|
|
||||||
$this->debugMessage("now loading page ".$item['uri']);
|
|
||||||
$articlePage = $this->get_cached($item['uri']);
|
|
||||||
|
|
||||||
|
$articlePage = $this->get_cached($item['uri']);
|
||||||
$content = $articlePage->find('.post-container', 0);
|
$content = $articlePage->find('.post-container', 0);
|
||||||
$item['content'] = $content->innertext;
|
$item['content'] = $content->innertext;
|
||||||
$item['author'] = $articlePage->find('a[rel=author]', 0)->innertext;
|
|
||||||
// format should parse 2014-03-25T16:21:20Z. But, according to http://stackoverflow.com/a/10478469, it is not that simple
|
|
||||||
$item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem);
|
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,35 +1,19 @@
|
||||||
<?php
|
<?php
|
||||||
class Les400CulsBridge extends RssExpander{
|
class Les400CulsBridge extends FeedExpander{
|
||||||
|
|
||||||
const MAINTAINER = "unknown";
|
|
||||||
const NAME = "Les 400 Culs";
|
|
||||||
const URI = "http://sexes.blogs.liberation.fr/";
|
|
||||||
const DESCRIPTION = "La planete sexe vue par Agnes Girard via rss-bridge";
|
|
||||||
|
|
||||||
|
const MAINTAINER = "unknown";
|
||||||
|
const NAME = "Les 400 Culs";
|
||||||
|
const URI = "http://sexes.blogs.liberation.fr/";
|
||||||
|
const DESCRIPTION = "La planete sexe vue par Agnes Girard via rss-bridge";
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
$this->collectExpandableDatas(self::URI.'feeds/');
|
$this->collectExpandableDatas(self::URI . 'feeds/');
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseRSSItem($newsItem) {
|
protected function parseItem($newsItem){
|
||||||
$item = array();
|
return $this->parseRSS_2_0_Item($newsItem);
|
||||||
$item['title'] = trim((string) $newsItem->title);
|
|
||||||
$this->debugMessage("browsing item ".var_export($newsItem, true));
|
|
||||||
if(empty($newsItem->guid)) {
|
|
||||||
$item['uri'] = (string) $newsItem->link;
|
|
||||||
} else {
|
|
||||||
$item['uri'] = (string) $newsItem->guid;
|
|
||||||
}
|
|
||||||
// now load that uri from cache
|
|
||||||
$this->debugMessage("now loading page ".$item['uri']);
|
|
||||||
// $articlePage = $this->get_cached($item['uri']);
|
|
||||||
|
|
||||||
// $content = $articlePage->find('.post-container', 0);
|
|
||||||
$item['content'] = (string) $newsItem->description;
|
|
||||||
$item['author'] = (string) $newsItem->author;
|
|
||||||
$item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem);
|
|
||||||
return $item;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getCacheDuration(){
|
public function getCacheDuration(){
|
||||||
return 7200; // 2h hours
|
return 7200; // 2h hours
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<?php
|
<?php
|
||||||
class TheOatmealBridge extends RssExpander{
|
class TheOatmealBridge extends FeedExpander{
|
||||||
|
|
||||||
const MAINTAINER = "Riduidel";
|
const MAINTAINER = "Riduidel";
|
||||||
const NAME = "The Oatmeal";
|
const NAME = "The Oatmeal";
|
||||||
|
@ -10,44 +10,17 @@ class TheOatmealBridge extends RssExpander{
|
||||||
$this->collectExpandableDatas('http://feeds.feedburner.com/oatmealfeed');
|
$this->collectExpandableDatas('http://feeds.feedburner.com/oatmealfeed');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected function parseItem($newsItem) {
|
||||||
|
$item = $this->parseRSS_1_0_Item($newsItem);
|
||||||
|
|
||||||
/**
|
|
||||||
* Since the oatmeal produces a weird RSS feed, I have to fix it by loading the items separatly from the feed infos
|
|
||||||
*/
|
|
||||||
protected function collect_RSS_2_0_data($rssContent) {
|
|
||||||
$rssContent->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/");
|
|
||||||
$rssHeaderContent = $rssContent->channel[0];
|
|
||||||
$this->debugMessage("RSS content is ===========\n".var_export($rssHeaderContent, true)."===========");
|
|
||||||
$this->load_RSS_2_0_feed_data($rssHeaderContent);
|
|
||||||
foreach($rssContent->item as $item) {
|
|
||||||
$this->debugMessage("parsing item ".var_export($item, true));
|
|
||||||
$this->items[] = $this->parseRSSItem($item);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected function parseRSSItem($newsItem) {
|
|
||||||
$namespaces = $newsItem->getNameSpaces(true);
|
|
||||||
$dc = $newsItem->children($namespaces['dc']);
|
|
||||||
$rdf = $newsItem->children($namespaces['rdf']);
|
|
||||||
$item = array();
|
|
||||||
$item['title'] = trim($newsItem->title);
|
|
||||||
$this->debugMessage("browsing Oatmeal item ".var_export($newsItem, true));
|
|
||||||
$item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about;
|
|
||||||
// now load that uri from cache
|
|
||||||
$this->debugMessage("now loading page ".$item['uri']);
|
|
||||||
$articlePage = $this->get_cached($item['uri']);
|
$articlePage = $this->get_cached($item['uri']);
|
||||||
|
|
||||||
$content = $articlePage->find('#comic', 0);
|
$content = $articlePage->find('#comic', 0);
|
||||||
if($content==null) {
|
if(is_null($content)) // load alternative
|
||||||
$content = $articlePage->find('#blog');
|
$content = $articlePage->find('#blog', 0);
|
||||||
}
|
|
||||||
$item['content'] = $content->innertext;
|
if(!is_null($content))
|
||||||
|
$item['content'] = $content->innertext;
|
||||||
|
|
||||||
$this->debugMessage("dc content is ".var_export($dc, true));
|
|
||||||
$item['author'] = (string) $dc->creator;
|
|
||||||
$item['timestamp'] = DateTime::createFromFormat(DateTime::ISO8601, $dc->date)->getTimestamp();
|
|
||||||
$this->debugMessage("writtem by ".$item['author']." on ".$item['timestamp']);
|
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
132
lib/Bridge.php
132
lib/Bridge.php
|
@ -585,29 +585,51 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract class RssExpander extends HttpCachingBridgeAbstract {
|
abstract class FeedExpander extends HttpCachingBridgeAbstract {
|
||||||
|
|
||||||
private $name;
|
private $name;
|
||||||
private $uri;
|
private $uri;
|
||||||
private $description;
|
private $description;
|
||||||
|
|
||||||
public function collectExpandableDatas($name){
|
public function collectExpandableDatas($url){
|
||||||
if(empty($name)){
|
if(empty($url)){
|
||||||
$this->returnServerError('There is no $name for this RSS expander');
|
$this->returnServerError('There is no $url for this RSS expander');
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->debugMessage('Loading from ' . $name);
|
$this->debugMessage('Loading from ' . $url);
|
||||||
|
|
||||||
/* Notice we do not use cache here on purpose:
|
/* Notice we do not use cache here on purpose:
|
||||||
* we want a fresh view of the RSS stream each time
|
* we want a fresh view of the RSS stream each time
|
||||||
*/
|
*/
|
||||||
$content = $this->getContents($name) or $this->returnServerError('Could not request ' . $name);
|
$content = $this->getContents($url)
|
||||||
|
or $this->returnServerError('Could not request ' . $url);
|
||||||
$rssContent = simplexml_load_string($content);
|
$rssContent = simplexml_load_string($content);
|
||||||
$this->debugMessage('loaded RSS from ' . $name);
|
|
||||||
// TODO insert RSS format detection
|
$this->debugMessage('Detecting feed format/version');
|
||||||
// For now we always assume RSS 2.0
|
if(isset($rssContent->channel[0])){
|
||||||
$this->collect_RSS_2_0_data($rssContent);
|
$this->debugMessage('Detected RSS format');
|
||||||
|
if(isset($rssContent->item[0])){
|
||||||
|
$this->debugMessage('Detected RSS 1.0 format');
|
||||||
|
$this->collect_RSS_1_0_data($rssContent);
|
||||||
|
} else {
|
||||||
|
$this->debugMessage('Detected RSS 0.9x or 2.0 format');
|
||||||
|
$this->collect_RSS_2_0_data($rssContent);
|
||||||
|
}
|
||||||
|
} elseif(isset($rssContent->entry[0])){
|
||||||
|
$this->debugMessage('Detected ATOM format');
|
||||||
|
$this->collect_ATOM_data($rssContent);
|
||||||
|
} else {
|
||||||
|
$this->debugMessage('Unknown feed format/version');
|
||||||
|
$this->returnServerError('The feed format is unknown!');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function collect_RSS_1_0_data($rssContent){
|
||||||
|
$this->load_RSS_2_0_feed_data($rssContent->channel[0]);
|
||||||
|
foreach($rssContent->item as $item){
|
||||||
|
$this->debugMessage('parsing item ' . var_export($item, true));
|
||||||
|
$this->items[] = $this->parseItem($item);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function collect_RSS_2_0_data($rssContent){
|
protected function collect_RSS_2_0_data($rssContent){
|
||||||
|
@ -616,7 +638,15 @@ abstract class RssExpander extends HttpCachingBridgeAbstract {
|
||||||
$this->load_RSS_2_0_feed_data($rssContent);
|
$this->load_RSS_2_0_feed_data($rssContent);
|
||||||
foreach($rssContent->item as $item){
|
foreach($rssContent->item as $item){
|
||||||
$this->debugMessage('parsing item ' . var_export($item, true));
|
$this->debugMessage('parsing item ' . var_export($item, true));
|
||||||
$this->items[] = $this->parseRSSItem($item);
|
$this->items[] = $this->parseItem($item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function collect_ATOM_data($content){
|
||||||
|
$this->load_ATOM_feed_data($content);
|
||||||
|
foreach($content->entry as $item){
|
||||||
|
$this->debugMessage('parsing item ' . var_export($item, true));
|
||||||
|
$this->items[] = $this->parseItem($item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -631,12 +661,88 @@ abstract class RssExpander extends HttpCachingBridgeAbstract {
|
||||||
$this->description = trim($rssContent->description);
|
$this->description = trim($rssContent->description);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected function load_ATOM_feed_data($content){
|
||||||
|
$this->name = $content->title;
|
||||||
|
|
||||||
|
// Find best link (only one, or first of 'alternate')
|
||||||
|
if(!isset($content->link)){
|
||||||
|
$this->uri = '';
|
||||||
|
} elseif (count($content->link) === 1){
|
||||||
|
$this->uri = $content->link[0]['href'];
|
||||||
|
} else {
|
||||||
|
$this->uri = '';
|
||||||
|
foreach($content->link as $link){
|
||||||
|
if(strtolower($link['rel']) === 'alternate'){
|
||||||
|
$this->uri = $link['href'];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(isset($content->subtitle))
|
||||||
|
$this->description = $content->subtitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parseATOMItem($feedItem){
|
||||||
|
$item = array();
|
||||||
|
if(isset($feedItem->id)) $item['uri'] = $feedItem->id;
|
||||||
|
if(isset($feedItem->title)) $item['title'] = $feedItem->title;
|
||||||
|
if(isset($feedItem->updated)) $item['timestamp'] = strtotime($feedItem->updated);
|
||||||
|
if(isset($feedItem->author)) $item['author'] = $feedItem->author->name;
|
||||||
|
if(isset($feedItem->content)) $item['content'] = $feedItem->content;
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parseRSS_0_9_1_Item($feedItem){
|
||||||
|
$item = array();
|
||||||
|
if(isset($feedItem->link)) $item['uri'] = $feedItem->link;
|
||||||
|
if(isset($feedItem->title)) $item['title'] = $feedItem->title;
|
||||||
|
// rss 0.91 doesn't support timestamps
|
||||||
|
// rss 0.91 doesn't support authors
|
||||||
|
if(isset($feedItem->description)) $item['content'] = $feedItem->description;
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parseRSS_1_0_Item($feedItem){
|
||||||
|
// 1.0 adds optional elements around the 0.91 standard
|
||||||
|
$item = $this->parseRSS_0_9_1_Item($feedItem);
|
||||||
|
|
||||||
|
$namespaces = $feedItem->getNamespaces(true);
|
||||||
|
if(isset($namespaces['dc'])){
|
||||||
|
$dc = $feedItem->children($namespaces['dc']);
|
||||||
|
if(isset($dc->date)) $item['timestamp'] = strtotime($dc->date);
|
||||||
|
if(isset($dc->creator)) $item['author'] = $dc->creator;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parseRSS_2_0_Item($feedItem){
|
||||||
|
// Primary data is compatible to 0.91 with some additional data
|
||||||
|
$item = $this->parseRSS_0_9_1_Item($feedItem);
|
||||||
|
|
||||||
|
$namespaces = $feedItem->getNamespaces(true);
|
||||||
|
if(isset($namespaces['dc'])) $dc = $feedItem->children($namespaces['dc']);
|
||||||
|
|
||||||
|
if(isset($feedItem->pubDate)){
|
||||||
|
$item['timestamp'] = strtotime($feedItem->pubDate);
|
||||||
|
} elseif(isset($dc->date)){
|
||||||
|
$item['timestamp'] = strtotime($dc->date);
|
||||||
|
}
|
||||||
|
if(isset($feedItem->author)){
|
||||||
|
$item['author'] = $feedItem->author;
|
||||||
|
} elseif(isset($dc->creator)){
|
||||||
|
$item['author'] = $dc->creator;
|
||||||
|
}
|
||||||
|
return $item;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method should return, from a source RSS item given by lastRSS, one of our Items objects
|
* Method should return, from a source RSS item given by lastRSS, one of our Items objects
|
||||||
* @param $item the input rss item
|
* @param $item the input rss item
|
||||||
* @return a RSS-Bridge Item, with (hopefully) the whole content)
|
* @return a RSS-Bridge Item, with (hopefully) the whole content)
|
||||||
*/
|
*/
|
||||||
abstract protected function parseRSSItem($item);
|
abstract protected function parseItem($item);
|
||||||
|
|
||||||
public function getURI(){
|
public function getURI(){
|
||||||
return $this->uri;
|
return $this->uri;
|
||||||
|
|
Loading…
Reference in a new issue