Merge https://github.com/sebsauvage/rss-bridge into new-attribute-system
This commit is contained in:
commit
55d4c6e425
5 changed files with 43 additions and 18 deletions
|
@ -18,4 +18,12 @@ Current development version
|
||||||
* Hyperlinks were not clickable.
|
* Hyperlinks were not clickable.
|
||||||
* Corrected error message when SimpleHtmlDom library is not installed.
|
* Corrected error message when SimpleHtmlDom library is not installed.
|
||||||
* Added changelog.
|
* Added changelog.
|
||||||
|
* (many many fixes on bridges)
|
||||||
|
* Media RSS format
|
||||||
|
* input types (list)
|
||||||
|
* cache purge function
|
||||||
|
* refactoring
|
||||||
|
|
||||||
|
Alpha 0.2
|
||||||
|
===
|
||||||
|
-TBA-
|
||||||
|
|
|
@ -90,3 +90,13 @@ Parameter Name | Parameter values | Description
|
||||||
type|text, number, list, checkbox| Type of the input, default is text
|
type|text, number, list, checkbox| Type of the input, default is text
|
||||||
required| true | Set this if you want your attribute to be required
|
required| true | Set this if you want your attribute to be required
|
||||||
values| [ {"name" : option1Name, "value" : "option1Value"}, ...] | Values list, required with the 'list' type
|
values| [ {"name" : option1Name, "value" : "option1Value"}, ...] | Values list, required with the 'list' type
|
||||||
|
|
||||||
|
#### Guidelines
|
||||||
|
|
||||||
|
* scripts (eg. Javascript) must be stripped out. Make good use of `strip_tags()` and `preg_replace()`
|
||||||
|
* bridge must present data within 8 seconds (adjust iterators accordingly)
|
||||||
|
* cache timeout must be fine-tuned so that each refresh can provide 1 or 2 new elements on busy periods
|
||||||
|
* `<audio>` and `<video>` must not autoplay. Seriously.
|
||||||
|
* do everything you can to extract valid timestamps. Translate formats, use API, exploit sitemap, whatever. Free the data!
|
||||||
|
* don't create duplicates. If the website runs on WordPress, use the generic WordPress bridge if possible.
|
||||||
|
* maintain efficient and well-commented code :wink:
|
||||||
|
|
23
README.md
23
README.md
|
@ -111,31 +111,20 @@ Including `PHP Simple HTML DOM Parser` under the [MIT License](http://opensource
|
||||||
|
|
||||||
Technical notes
|
Technical notes
|
||||||
===
|
===
|
||||||
* There is a cache so that source services won't ban you even if you hammer the rss-bridge with requests. Each bridge has a different duration for the cache. The `cache` subdirectory will be automatically created. You can purge it whenever you want.
|
* There is a cache so that source services won't ban you even if you hammer the rss-bridge with requests. Each bridge has a different duration for the cache. The `cache` subdirectory will be automatically created and cached objects older than 24 hours get purged.
|
||||||
* To implement a new rss-bridge, create a new class in `bridges` subdirectory. Look at existing bridges for examples and the guidelines below. For items you generate in `$this->items`, only `uri` and `title` are mandatory in each item. `timestamp` and `content` are optional but recommended. Any additional key will be ignored by ATOM feed (but outputed to json).
|
* To implement a new rss-bridge, [follow the specifications](CREATE_BRIDGE.md) and take a look at existing bridges for examples.
|
||||||
|
|
||||||
### Bridge guidelines
|
|
||||||
|
|
||||||
* metatags: `@name` {Name of service}, `@homepage` {URL to homepage}, `@description`, `@update` {YYYY-MM-DD}, `@maintainer` {Github username or nickname}
|
|
||||||
* scripts (eg. Javascript) must be stripped out. Make good use of `strip_tags()` and `preg_replace()`
|
|
||||||
* bridge must present data within 8 seconds (adjust iterators accordingly)
|
|
||||||
* cache timeout must be fine-tuned so that each refresh can provide 1 or 2 new elements on busy periods
|
|
||||||
* `<audio>` and `<video>` must not autoplay. Seriously.
|
|
||||||
* do everything you can to extract valid timestamps. Translate formats, use API, exploit sitemap, whatever. Free the data!
|
|
||||||
* don't create duplicates. If the website runs on WordPress, use the generic WordPress bridge if possible.
|
|
||||||
* maintain efficient and well-commented code :wink:
|
|
||||||
|
|
||||||
Rant
|
Rant
|
||||||
===
|
===
|
||||||
|
|
||||||
*Dear so-called "social" websites.*
|
*Dear so-called "social" websites.*
|
||||||
|
|
||||||
Your catchword is "share", but you don't want us to share. You want to keep us within your walled gardens. That's why you've been removing RSS links from webpages, hiding them deep on your website, or removed RSS entirely, replacing it with crippled or demented proprietary API. **FUCK YOU.**
|
Your catchword is "share", but you don't want us to share. You want to keep us within your walled gardens. That's why you've been removing RSS links from webpages, hiding them deep on your website, or removed feeds entirely, replacing it with crippled or demented proprietary API. **FUCK YOU.**
|
||||||
|
|
||||||
You're not social when you hamper sharing by removing RSS. You're happy to have customers creating content for your ecosystem, but you don't want this content out - a content you do not even own. Google Takeout is just a gimmick. We want our data to flow, we want RSS.
|
You're not social when you hamper sharing by removing feeds. You're happy to have customers creating content for your ecosystem, but you don't want this content out - a content you do not even own. Google Takeout is just a gimmick. We want our data to flow, we want RSS or ATOM feeds.
|
||||||
|
|
||||||
We want to share with friends, using open protocols: RSS, XMPP, whatever. Because no one wants to have *your* service with *your* applications using *your* API force-feeding them. Friends must be free to choose whatever software and service they want.
|
We want to share with friends, using open protocols: RSS, ATOM, XMPP, whatever. Because no one wants to have *your* service with *your* applications using *your* API force-feeding them. Friends must be free to choose whatever software and service they want.
|
||||||
|
|
||||||
We are rebuilding bridges you have wilfully destroyed.
|
We are rebuilding bridges you have wilfully destroyed.
|
||||||
|
|
||||||
Get your shit together: Put RSS back in.
|
Get your shit together: Put RSS/ATOM back in.
|
||||||
|
|
17
index.php
17
index.php
|
@ -24,6 +24,23 @@ if (!extension_loaded('openssl'))
|
||||||
|
|
||||||
// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
|
// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
|
||||||
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
|
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
|
||||||
|
// cache file purge - delete cache files older than 24 hours
|
||||||
|
$cacheTimeLimit = time() - 60*60*24 ;
|
||||||
|
$cachePath = 'cache';
|
||||||
|
if(file_exists($cachePath)) {
|
||||||
|
$cacheIterator = new RecursiveIteratorIterator(
|
||||||
|
new RecursiveDirectoryIterator($cachePath),
|
||||||
|
RecursiveIteratorIterator::CHILD_FIRST
|
||||||
|
);
|
||||||
|
foreach ($cacheIterator as $cacheFile) {
|
||||||
|
if (in_array($cacheFile->getBasename(), array('.', '..')))
|
||||||
|
continue;
|
||||||
|
elseif ($cacheFile->isFile()) {
|
||||||
|
if( filemtime($cacheFile->getPathname()) < $cacheTimeLimit )
|
||||||
|
unlink( $cacheFile->getPathname() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// default whitelist
|
// default whitelist
|
||||||
$whitelist_file = './whitelist.txt';
|
$whitelist_file = './whitelist.txt';
|
||||||
|
|
|
@ -133,6 +133,7 @@ class HTMLSanitizer {
|
||||||
var $keptAttributes;
|
var $keptAttributes;
|
||||||
var $onlyKeepText;
|
var $onlyKeepText;
|
||||||
|
|
||||||
|
|
||||||
public static $DEFAULT_CLEAR_TAGS = ["script", "iframe"];
|
public static $DEFAULT_CLEAR_TAGS = ["script", "iframe"];
|
||||||
public static $KEPT_ATTRIBUTES = ["title", "href", "src"];
|
public static $KEPT_ATTRIBUTES = ["title", "href", "src"];
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue