Browse Source

implement proxy feature without modifying simple_html_dom code

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
Pierre Mazière 7 years ago
parent
commit
36d39d3f59
2 changed files with 27 additions and 17 deletions
  1. 25 15
      lib/Bridge.php
  2. 2 2
      vendor/simplehtmldom/simple_html_dom.php

+ 25 - 15
lib/Bridge.php

@@ -90,6 +90,31 @@ abstract class BridgeAbstract implements BridgeInterface{
         return $this;
     }
 
+    protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){
+      $contextOptions = array(
+        'http' => array(
+          'user_agent'=>ini_get('user_agent')
+        ),
+      );
+
+      if(defined('PROXY_URL')) {
+        $contextOptions['http']['proxy'] = PROXY_URL;
+        $contextOptions['http']['request_fulluri'] = true;
+
+        if(is_null($context)){
+          $context = stream_context_create($contextOptions);
+        } else {
+          $prevContext=$context;
+          if(!stream_context_set_option($context,$contextOptions)){
+            $context=$prevContext;
+          };
+        }
+      }
+      return file_get_html($url,$use_include_path,$context,$offset,$maxLen,
+        $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRtext,
+        $defaultSpanText);
+    }
+
 }
 
 /**
@@ -354,19 +379,4 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
     }
 }
 
-function advanced_file_get_contents($url) {
-
-	if(defined('PROXY_URL')) {
-		$context = array(
-			'http' => array(
-				'proxy' => PROXY_URL,
-				'request_fulluri' => true,
-			),
-		);
-		$context = stream_context_create($context);
-		return file_get_contents($url, false, $context);
-	} else {
-		return file_get_contents($url);
-	}
 
-}

+ 2 - 2
vendor/simplehtmldom/simple_html_dom.php

@@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset =
 	// We DO force the tags to be terminated.
 	$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
 	// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
-	$contents = advanced_file_get_contents($url, $use_include_path, $context, $offset);
+	$contents = file_get_contents($url, $use_include_path, $context, $offset);
 	// Paperg - use our own mechanism for getting the contents as we want to control the timeout.
 	//$contents = retrieve_url_contents($url);
 	if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
@@ -1094,7 +1094,7 @@ class simple_html_dom
 	function load_file()
 	{
 		$args = func_get_args();
-		$this->load(call_user_func_array('advanced_file_get_contents', $args), true);
+		$this->load(call_user_func_array('file_get_contents', $args), true);
 		// Throw an error if we can't properly load the dom.
 		if (($error=error_get_last())!==null) {
 			$this->clear();