Browse Source

Correction de quelques problèmes avec RSSExpander suite à la migration dans lib/Bridge.
Correction de typos.
Ajout de la possibilité d'utiliser un proxy.

Teromene 8 years ago
parent
commit
1a4a428449
6 changed files with 37 additions and 22 deletions
  1. 3 3
      bridges/Gawker.php
  2. 1 1
      bridges/WorldOfTanks.php
  3. 3 0
      index.php
  4. 20 13
      lib/Bridge.php
  5. 8 3
      lib/HTMLUtils.php
  6. 2 2
      vendor/simplehtmldom/simple_html_dom.php

+ 3 - 3
bridges/Gawker.php

@@ -31,10 +31,10 @@ class Gawker extends RssExpander{
 			trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR);
         } else {
             $this->name = $param['site'];
-			$param['url'] = $this->toURI(strtolower($param['site']));
+			$url = $this->toURI(strtolower($param['site']));
         }
 //        $this->message("loading feed from ".$this->getURI());
-        parent::collectExpandableDatas($param, $name);
+        parent::collectExpandableDatas($param, $url);
     }
     
     protected function parseRSSItem($newsItem) {
@@ -49,7 +49,7 @@ class Gawker extends RssExpander{
             $articlePage = str_get_html($this->get_cached($item->uri));
             if(is_object($articlePage)) {
                 $content = $articlePage->find('.post-content', 0);
-                $this->defaultImageSrcTo($content, $this->getURI());
+                HTMLSanitizer::defaultImageSrcTo($content, $this->getURI());
                 $vcard = $articlePage->find('.vcard', 0);
                 if(is_object($vcard)) {
                     $authorLink = $vcard->find('a', 0);

+ 1 - 1
bridges/WorldOfTanks.php

@@ -86,7 +86,7 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{
 //        $this->message("loading page ".$item->uri);
         $articlePage = str_get_html($this->get_cached($item->uri));
         $content = $articlePage->find('.l-content', 0);
-        $this->defaultImageSrcTo($content, WORLD_OF_TANKS);
+        HTMLSanitizer::defaultImageSrcTo($content, WORLD_OF_TANKS);
         $item->title = $content->find('h1', 0)->innertext;
         $item->content = $content->find('.b-content', 0)->innertext;
 //        $item->name = $auteur->innertext;

+ 3 - 0
index.php

@@ -11,6 +11,8 @@ TODO :
 - implement header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile)));
 */
 
+//define('PROXY_URL', 'tcp://192.168.0.0:28');
+
 date_default_timezone_set('UTC');
 error_reporting(0);
 //ini_set('display_errors','1'); error_reporting(E_ALL);  // For debugging only.
@@ -21,6 +23,7 @@ if (!extension_loaded('openssl'))
 
 // FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
 ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
+
 // -------
 
 // cache file purge - delete cache files older than 24 hours

+ 20 - 13
lib/Bridge.php

@@ -39,7 +39,7 @@ abstract class BridgeAbstract implements BridgeInterface{
     }
 
     /**
-    * Return datas store in the bridge
+    * Return datas stored in the bridge
     * @return mixed
     */
     public function getDatas(){
@@ -50,7 +50,7 @@ abstract class BridgeAbstract implements BridgeInterface{
 
     /**
     * Defined datas with parameters depending choose bridge
-    * Note : you can defined a cache before with "setCache"
+    * Note : you can define a cache before with "setCache"
     * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
     */
     public function setDatas(array $param){
@@ -90,16 +90,6 @@ abstract class BridgeAbstract implements BridgeInterface{
         return $this;
     }
 
-    /**
-     * Set default image SRC attribute to point on given server when none is provided (that's to say when image src starts with '/'
-     */
-    public function defaultImageSrcTo($content, $server) {
-        foreach($content->find('img') as $image) {
-            if(strpos($image->src, '/')==0) {
-                $image->src = $server.$image->src;
-            }
-        }
-    }
 }
 
 /**
@@ -311,7 +301,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
 
     public function collectExpandableDatas(array $param, $name){
         if (empty($name)) {
-            $this->returnError('There is no $param[\'url\'] for this RSS expander', 404);
+            $this->returnError('There is no $name for this RSS expander', 404);
         }
 //       $this->message("Loading from ".$param['url']);
         // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
@@ -363,3 +353,20 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
         return $this->description;
     }
 }
+
+function advanced_file_get_contents($url) {
+
+	if(defined('PROXY_URL')) {
+		$context = array(
+			'http' => array(
+				'proxy' => PROXY_URL,
+				'request_fulluri' => true,
+			),
+		);
+		$context = stream_context_create($context);
+		return file_get_contents($url, false, $context);
+	} else {
+		return file_get_contents($url);
+	}
+
+}

+ 8 - 3
lib/HTMLUtils.php

@@ -91,7 +91,7 @@ CARD;
 				} else if($inputEntry['type'] == 'number') {
 					$card .= '<input '.$additionalInfoString.' id="' . $idArg . '" type="number" value="" placeholder="' . $inputEntry['exampleValue'] . '" name="' . $inputEntry['identifier'] . '" /><br />' . PHP_EOL;
 				} else if($inputEntry['type'] == 'list') {
-					$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['name'] . '" >';
+					$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['identifier'] . '" >';
 					foreach($inputEntry['values'] as $listValues) {
 
 						$card .= "<option $additionalInfoString value='" . $listValues['value'] . "'>" . $listValues['name'] . "</option>";
@@ -157,9 +157,7 @@ class HTMLSanitizer {
 				$element->outertext = '';
 			} else {
 				foreach($element->getAllAttributes() as $attributeName => $attribute) {
-
 					if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName);
-
 				}
 			}
 		}
@@ -167,6 +165,13 @@ class HTMLSanitizer {
 		return $htmlContent;
 
 	}
+	public static function defaultImageSrcTo($content, $server) {
+        foreach($content->find('img') as $image) {
+            if(strpos($image->src, '/')==0) {
+                $image->src = $server.$image->src;
+            }
+        }
+    }
 
 }
 ?>

+ 2 - 2
vendor/simplehtmldom/simple_html_dom.php

@@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset =
 	// We DO force the tags to be terminated.
 	$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
 	// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
-	$contents = file_get_contents($url, $use_include_path, $context, $offset);
+	$contents = advanced_file_get_contents($url, $use_include_path, $context, $offset);
 	// Paperg - use our own mechanism for getting the contents as we want to control the timeout.
 	//$contents = retrieve_url_contents($url);
 	if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
@@ -1094,7 +1094,7 @@ class simple_html_dom
 	function load_file()
 	{
 		$args = func_get_args();
-		$this->load(call_user_func_array('file_get_contents', $args), true);
+		$this->load(call_user_func_array('advanced_file_get_contents', $args), true);
 		// Throw an error if we can't properly load the dom.
 		if (($error=error_get_last())!==null) {
 			$this->clear();