forked from blallo/rss-bridge
Correction de quelques problèmes avec RSSExpander suite à la migration dans lib/Bridge.
Correction de typos. Ajout de la possibilité d'utiliser un proxy.
This commit is contained in:
parent
e1cdca6971
commit
a07839019e
6 changed files with 37 additions and 22 deletions
|
@ -31,10 +31,10 @@ class Gawker extends RssExpander{
|
|||
trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR);
|
||||
} else {
|
||||
$this->name = $param['site'];
|
||||
$param['url'] = $this->toURI(strtolower($param['site']));
|
||||
$url = $this->toURI(strtolower($param['site']));
|
||||
}
|
||||
// $this->message("loading feed from ".$this->getURI());
|
||||
parent::collectExpandableDatas($param, $name);
|
||||
parent::collectExpandableDatas($param, $url);
|
||||
}
|
||||
|
||||
protected function parseRSSItem($newsItem) {
|
||||
|
@ -49,7 +49,7 @@ class Gawker extends RssExpander{
|
|||
$articlePage = str_get_html($this->get_cached($item->uri));
|
||||
if(is_object($articlePage)) {
|
||||
$content = $articlePage->find('.post-content', 0);
|
||||
$this->defaultImageSrcTo($content, $this->getURI());
|
||||
HTMLSanitizer::defaultImageSrcTo($content, $this->getURI());
|
||||
$vcard = $articlePage->find('.vcard', 0);
|
||||
if(is_object($vcard)) {
|
||||
$authorLink = $vcard->find('a', 0);
|
||||
|
|
|
@ -86,7 +86,7 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{
|
|||
// $this->message("loading page ".$item->uri);
|
||||
$articlePage = str_get_html($this->get_cached($item->uri));
|
||||
$content = $articlePage->find('.l-content', 0);
|
||||
$this->defaultImageSrcTo($content, WORLD_OF_TANKS);
|
||||
HTMLSanitizer::defaultImageSrcTo($content, WORLD_OF_TANKS);
|
||||
$item->title = $content->find('h1', 0)->innertext;
|
||||
$item->content = $content->find('.b-content', 0)->innertext;
|
||||
// $item->name = $auteur->innertext;
|
||||
|
|
|
@ -11,6 +11,8 @@ TODO :
|
|||
- implement header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile)));
|
||||
*/
|
||||
|
||||
//define('PROXY_URL', 'tcp://192.168.0.0:28');
|
||||
|
||||
date_default_timezone_set('UTC');
|
||||
error_reporting(0);
|
||||
//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
|
||||
|
@ -21,6 +23,7 @@ if (!extension_loaded('openssl'))
|
|||
|
||||
// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
|
||||
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
|
||||
|
||||
// -------
|
||||
|
||||
// default whitelist
|
||||
|
|
|
@ -39,7 +39,7 @@ abstract class BridgeAbstract implements BridgeInterface{
|
|||
}
|
||||
|
||||
/**
|
||||
* Return datas store in the bridge
|
||||
* Return datas stored in the bridge
|
||||
* @return mixed
|
||||
*/
|
||||
public function getDatas(){
|
||||
|
@ -50,7 +50,7 @@ abstract class BridgeAbstract implements BridgeInterface{
|
|||
|
||||
/**
|
||||
* Defined datas with parameters depending choose bridge
|
||||
* Note : you can defined a cache before with "setCache"
|
||||
* Note : you can define a cache before with "setCache"
|
||||
* @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
|
||||
*/
|
||||
public function setDatas(array $param){
|
||||
|
@ -90,16 +90,6 @@ abstract class BridgeAbstract implements BridgeInterface{
|
|||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default image SRC attribute to point on given server when none is provided (that's to say when image src starts with '/'
|
||||
*/
|
||||
public function defaultImageSrcTo($content, $server) {
|
||||
foreach($content->find('img') as $image) {
|
||||
if(strpos($image->src, '/')==0) {
|
||||
$image->src = $server.$image->src;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -311,7 +301,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
|
|||
|
||||
public function collectExpandableDatas(array $param, $name){
|
||||
if (empty($name)) {
|
||||
$this->returnError('There is no $param[\'url\'] for this RSS expander', 404);
|
||||
$this->returnError('There is no $name for this RSS expander', 404);
|
||||
}
|
||||
// $this->message("Loading from ".$param['url']);
|
||||
// Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
|
||||
|
@ -363,3 +353,20 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
|
|||
return $this->description;
|
||||
}
|
||||
}
|
||||
|
||||
function advanced_file_get_contents($url) {
|
||||
|
||||
if(defined('PROXY_URL')) {
|
||||
$context = array(
|
||||
'http' => array(
|
||||
'proxy' => PROXY_URL,
|
||||
'request_fulluri' => true,
|
||||
),
|
||||
);
|
||||
$context = stream_context_create($context);
|
||||
return file_get_contents($url, false, $context);
|
||||
} else {
|
||||
return file_get_contents($url);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ CARD;
|
|||
} else if($inputEntry['type'] == 'number') {
|
||||
$card .= '<input '.$additionalInfoString.' id="' . $idArg . '" type="number" value="" placeholder="' . $inputEntry['exampleValue'] . '" name="' . $inputEntry['identifier'] . '" /><br />' . PHP_EOL;
|
||||
} else if($inputEntry['type'] == 'list') {
|
||||
$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['name'] . '" >';
|
||||
$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['identifier'] . '" >';
|
||||
foreach($inputEntry['values'] as $listValues) {
|
||||
|
||||
$card .= "<option $additionalInfoString value='" . $listValues['value'] . "'>" . $listValues['name'] . "</option>";
|
||||
|
@ -157,9 +157,7 @@ class HTMLSanitizer {
|
|||
$element->outertext = '';
|
||||
} else {
|
||||
foreach($element->getAllAttributes() as $attributeName => $attribute) {
|
||||
|
||||
if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -167,6 +165,13 @@ class HTMLSanitizer {
|
|||
return $htmlContent;
|
||||
|
||||
}
|
||||
public static function defaultImageSrcTo($content, $server) {
|
||||
foreach($content->find('img') as $image) {
|
||||
if(strpos($image->src, '/')==0) {
|
||||
$image->src = $server.$image->src;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
?>
|
||||
|
|
4
vendor/simplehtmldom/simple_html_dom.php
vendored
4
vendor/simplehtmldom/simple_html_dom.php
vendored
|
@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset =
|
|||
// We DO force the tags to be terminated.
|
||||
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
|
||||
// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
|
||||
$contents = file_get_contents($url, $use_include_path, $context, $offset);
|
||||
$contents = advanced_file_get_contents($url, $use_include_path, $context, $offset);
|
||||
// Paperg - use our own mechanism for getting the contents as we want to control the timeout.
|
||||
//$contents = retrieve_url_contents($url);
|
||||
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
|
||||
|
@ -1094,7 +1094,7 @@ class simple_html_dom
|
|||
function load_file()
|
||||
{
|
||||
$args = func_get_args();
|
||||
$this->load(call_user_func_array('file_get_contents', $args), true);
|
||||
$this->load(call_user_func_array('advanced_file_get_contents', $args), true);
|
||||
// Throw an error if we can't properly load the dom.
|
||||
if (($error=error_get_last())!==null) {
|
||||
$this->clear();
|
||||
|
|
Loading…
Reference in a new issue