forked from blallo/rss-bridge
Merge pull request #177 from teromene/new-attribute-system
Et ça continue...
This commit is contained in:
commit
b2b0ea6a44
6 changed files with 37 additions and 22 deletions
|
@ -31,10 +31,10 @@ class Gawker extends RssExpander{
|
||||||
trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR);
|
trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR);
|
||||||
} else {
|
} else {
|
||||||
$this->name = $param['site'];
|
$this->name = $param['site'];
|
||||||
$param['url'] = $this->toURI(strtolower($param['site']));
|
$url = $this->toURI(strtolower($param['site']));
|
||||||
}
|
}
|
||||||
// $this->message("loading feed from ".$this->getURI());
|
// $this->message("loading feed from ".$this->getURI());
|
||||||
parent::collectExpandableDatas($param, $name);
|
parent::collectExpandableDatas($param, $url);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseRSSItem($newsItem) {
|
protected function parseRSSItem($newsItem) {
|
||||||
|
@ -49,7 +49,7 @@ class Gawker extends RssExpander{
|
||||||
$articlePage = str_get_html($this->get_cached($item->uri));
|
$articlePage = str_get_html($this->get_cached($item->uri));
|
||||||
if(is_object($articlePage)) {
|
if(is_object($articlePage)) {
|
||||||
$content = $articlePage->find('.post-content', 0);
|
$content = $articlePage->find('.post-content', 0);
|
||||||
$this->defaultImageSrcTo($content, $this->getURI());
|
HTMLSanitizer::defaultImageSrcTo($content, $this->getURI());
|
||||||
$vcard = $articlePage->find('.vcard', 0);
|
$vcard = $articlePage->find('.vcard', 0);
|
||||||
if(is_object($vcard)) {
|
if(is_object($vcard)) {
|
||||||
$authorLink = $vcard->find('a', 0);
|
$authorLink = $vcard->find('a', 0);
|
||||||
|
|
|
@ -86,7 +86,7 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{
|
||||||
// $this->message("loading page ".$item->uri);
|
// $this->message("loading page ".$item->uri);
|
||||||
$articlePage = str_get_html($this->get_cached($item->uri));
|
$articlePage = str_get_html($this->get_cached($item->uri));
|
||||||
$content = $articlePage->find('.l-content', 0);
|
$content = $articlePage->find('.l-content', 0);
|
||||||
$this->defaultImageSrcTo($content, WORLD_OF_TANKS);
|
HTMLSanitizer::defaultImageSrcTo($content, WORLD_OF_TANKS);
|
||||||
$item->title = $content->find('h1', 0)->innertext;
|
$item->title = $content->find('h1', 0)->innertext;
|
||||||
$item->content = $content->find('.b-content', 0)->innertext;
|
$item->content = $content->find('.b-content', 0)->innertext;
|
||||||
// $item->name = $auteur->innertext;
|
// $item->name = $auteur->innertext;
|
||||||
|
|
|
@ -11,6 +11,8 @@ TODO :
|
||||||
- implement header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile)));
|
- implement header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile)));
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
//define('PROXY_URL', 'tcp://192.168.0.0:28');
|
||||||
|
|
||||||
date_default_timezone_set('UTC');
|
date_default_timezone_set('UTC');
|
||||||
error_reporting(0);
|
error_reporting(0);
|
||||||
//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
|
//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
|
||||||
|
@ -21,6 +23,7 @@ if (!extension_loaded('openssl'))
|
||||||
|
|
||||||
// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
|
// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
|
||||||
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
|
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
|
||||||
|
|
||||||
// -------
|
// -------
|
||||||
|
|
||||||
// default whitelist
|
// default whitelist
|
||||||
|
|
|
@ -39,7 +39,7 @@ abstract class BridgeAbstract implements BridgeInterface{
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return datas store in the bridge
|
* Return datas stored in the bridge
|
||||||
* @return mixed
|
* @return mixed
|
||||||
*/
|
*/
|
||||||
public function getDatas(){
|
public function getDatas(){
|
||||||
|
@ -50,7 +50,7 @@ abstract class BridgeAbstract implements BridgeInterface{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defined datas with parameters depending choose bridge
|
* Defined datas with parameters depending choose bridge
|
||||||
* Note : you can defined a cache before with "setCache"
|
* Note : you can define a cache before with "setCache"
|
||||||
* @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
|
* @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
|
||||||
*/
|
*/
|
||||||
public function setDatas(array $param){
|
public function setDatas(array $param){
|
||||||
|
@ -90,16 +90,6 @@ abstract class BridgeAbstract implements BridgeInterface{
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Set default image SRC attribute to point on given server when none is provided (that's to say when image src starts with '/'
|
|
||||||
*/
|
|
||||||
public function defaultImageSrcTo($content, $server) {
|
|
||||||
foreach($content->find('img') as $image) {
|
|
||||||
if(strpos($image->src, '/')==0) {
|
|
||||||
$image->src = $server.$image->src;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -311,7 +301,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
|
||||||
|
|
||||||
public function collectExpandableDatas(array $param, $name){
|
public function collectExpandableDatas(array $param, $name){
|
||||||
if (empty($name)) {
|
if (empty($name)) {
|
||||||
$this->returnError('There is no $param[\'url\'] for this RSS expander', 404);
|
$this->returnError('There is no $name for this RSS expander', 404);
|
||||||
}
|
}
|
||||||
// $this->message("Loading from ".$param['url']);
|
// $this->message("Loading from ".$param['url']);
|
||||||
// Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
|
// Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
|
||||||
|
@ -363,3 +353,20 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
|
||||||
return $this->description;
|
return $this->description;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function advanced_file_get_contents($url) {
|
||||||
|
|
||||||
|
if(defined('PROXY_URL')) {
|
||||||
|
$context = array(
|
||||||
|
'http' => array(
|
||||||
|
'proxy' => PROXY_URL,
|
||||||
|
'request_fulluri' => true,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
$context = stream_context_create($context);
|
||||||
|
return file_get_contents($url, false, $context);
|
||||||
|
} else {
|
||||||
|
return file_get_contents($url);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -91,7 +91,7 @@ CARD;
|
||||||
} else if($inputEntry['type'] == 'number') {
|
} else if($inputEntry['type'] == 'number') {
|
||||||
$card .= '<input '.$additionalInfoString.' id="' . $idArg . '" type="number" value="" placeholder="' . $inputEntry['exampleValue'] . '" name="' . $inputEntry['identifier'] . '" /><br />' . PHP_EOL;
|
$card .= '<input '.$additionalInfoString.' id="' . $idArg . '" type="number" value="" placeholder="' . $inputEntry['exampleValue'] . '" name="' . $inputEntry['identifier'] . '" /><br />' . PHP_EOL;
|
||||||
} else if($inputEntry['type'] == 'list') {
|
} else if($inputEntry['type'] == 'list') {
|
||||||
$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['name'] . '" >';
|
$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['identifier'] . '" >';
|
||||||
foreach($inputEntry['values'] as $listValues) {
|
foreach($inputEntry['values'] as $listValues) {
|
||||||
|
|
||||||
$card .= "<option $additionalInfoString value='" . $listValues['value'] . "'>" . $listValues['name'] . "</option>";
|
$card .= "<option $additionalInfoString value='" . $listValues['value'] . "'>" . $listValues['name'] . "</option>";
|
||||||
|
@ -157,9 +157,7 @@ class HTMLSanitizer {
|
||||||
$element->outertext = '';
|
$element->outertext = '';
|
||||||
} else {
|
} else {
|
||||||
foreach($element->getAllAttributes() as $attributeName => $attribute) {
|
foreach($element->getAllAttributes() as $attributeName => $attribute) {
|
||||||
|
|
||||||
if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName);
|
if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -167,6 +165,13 @@ class HTMLSanitizer {
|
||||||
return $htmlContent;
|
return $htmlContent;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
public static function defaultImageSrcTo($content, $server) {
|
||||||
|
foreach($content->find('img') as $image) {
|
||||||
|
if(strpos($image->src, '/')==0) {
|
||||||
|
$image->src = $server.$image->src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
|
|
4
vendor/simplehtmldom/simple_html_dom.php
vendored
4
vendor/simplehtmldom/simple_html_dom.php
vendored
|
@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset =
|
||||||
// We DO force the tags to be terminated.
|
// We DO force the tags to be terminated.
|
||||||
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
|
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
|
||||||
// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
|
// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
|
||||||
$contents = file_get_contents($url, $use_include_path, $context, $offset);
|
$contents = advanced_file_get_contents($url, $use_include_path, $context, $offset);
|
||||||
// Paperg - use our own mechanism for getting the contents as we want to control the timeout.
|
// Paperg - use our own mechanism for getting the contents as we want to control the timeout.
|
||||||
//$contents = retrieve_url_contents($url);
|
//$contents = retrieve_url_contents($url);
|
||||||
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
|
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
|
||||||
|
@ -1094,7 +1094,7 @@ class simple_html_dom
|
||||||
function load_file()
|
function load_file()
|
||||||
{
|
{
|
||||||
$args = func_get_args();
|
$args = func_get_args();
|
||||||
$this->load(call_user_func_array('file_get_contents', $args), true);
|
$this->load(call_user_func_array('advanced_file_get_contents', $args), true);
|
||||||
// Throw an error if we can't properly load the dom.
|
// Throw an error if we can't properly load the dom.
|
||||||
if (($error=error_get_last())!==null) {
|
if (($error=error_get_last())!==null) {
|
||||||
$this->clear();
|
$this->clear();
|
||||||
|
|
Loading…
Reference in a new issue