From 2df2623430016373c6a899784519f053f1b704d0 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 13:11:15 +0200 Subject: [PATCH 1/8] [index] Add 'curl' extension check --- index.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.php b/index.php index ea21799..5827ab2 100644 --- a/index.php +++ b/index.php @@ -80,6 +80,9 @@ if(!extension_loaded('mbstring')) if(!extension_loaded('simplexml')) die('"simplexml" extension not loaded. Please check "php.ini"'); +if(!extension_loaded('curl')) + die('"curl" extension not loaded. Please check "php.ini"'); + // configuration checks if(ini_get('allow_url_fopen') !== "1") die('"allow_url_fopen" is not set to "1". Please check "php.ini'); From ee78e7613fa6ceeb7cfebf91a2b2fe0a202ad68b Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 13:12:13 +0200 Subject: [PATCH 2/8] [contents] Replace file_get_contents by cURL cURL is a powerful library specifically designed to connect to many different types of servers with different types of protocols. For more detailed information refer to the PHP cURL manual: - http://php.net/manual/en/book.curl.php Due to this change some parameters for the getContents function were necessary (also applies to getSimpleHTMLDOM and getSimpleHTMLDOMCached): > $use_include_path removed This parameter has never been used and doesn't even make sense in this context; If set to true file_get_contents would also search for files in the include_path (specified in php.ini). > $context replaced by $header and $opts The $context parameter allowed for customization of the request in order to change how file_get_contents would acquire the data (i.e. using POST instead of GET, sending custom header, etc...) cURL also provides facilities to specify custom headers and change how it communicates to severs. cURL, however, is much more advanced. - $header is an optional parameter (empty by default). It receives an array of strings to send in the HTTP request header. See 'CURLOPT_HTTPHEADER': "An array of HTTP header fields to set, in the format array('Content-type: text/plain', 'Content-length: 100')" - php.net/manual/en/function.curl-setopt.php - $opts is an optional parameter (empty by default). It receives an array of options, where each option is a key-value-pair of a cURL option (CURLOPT_*) and it's associated parameter. This parameter accepts any of the CURLOPT_* settings. Example (sending POST instead of GET): $opts = array( CURLOPT_POST => 1, CURLOPT_POSTFIELDS => '&action=none' ); $html = getContents($url, array(), $opts); Refer to the cURL setopt manual for more information: - php.net/manual/en/function.curl-setopt.php > $offset and $maxlen removed These options were supported by file_get_contents, but there doesn't seem to be an equivalent in cURL. Since no caller uses them they are safe to remove. Compressed data / Encoding By using cURL instead of file_get_contents RSS-Bridge no longer has to handle compressed data manually. See 'CURLOPT_ENCODING': "[...] Supported encodings are "identity", "deflate", and "gzip". If an empty string, "", is set, a header containing all supported encoding types is sent." - http://php.net/manual/en/function.curl-setopt.php Notice: By default all encoding types are accepted (""). This can be changed by setting a custom option via $opts. Example: $opts = array(CURLOPT_ENCODING => 'gzip'); $html = getContents($url, array(), $opts); Proxy The proxy implementation should still work, but there doesn't seem to be an equivalent for 'request_fulluri = true'. To my understanding this isn't an issue because cURL knows how to handle proxy communication. --- lib/contents.php | 80 +++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 59 deletions(-) diff --git a/lib/contents.php b/lib/contents.php index ec62c8d..f9b68d2 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -1,77 +1,41 @@ array( - 'user_agent' => ini_get('user_agent'), - 'accept_encoding' => 'gzip' - ) - ); +function getContents($url, $header = array(), $opts = array()){ + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, $header); + curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent')); + curl_setopt($ch, CURLOPT_ENCODING, ''); - if(defined('PROXY_URL') && !defined('NOPROXY')) { - $contextOptions['http']['proxy'] = PROXY_URL; - $contextOptions['http']['request_fulluri'] = true; - - if(is_null($context)) { - $context = stream_context_create($contextOptions); - } else { - $prevContext = $context; - if(!stream_context_set_option($context, $contextOptions)) { - $context = $prevContext; - } + if(is_array($opts)) { + foreach($opts as $key => $value) { + curl_setopt($ch, $key, $value); } } - if(is_null($maxlen)) { - $content = file_get_contents($url, $use_include_path, $context, $offset); - } else { - $content = file_get_contents($url, $use_include_path, $context, $offset, $maxlen); + if(defined('PROXY_URL') && !defined('NOPROXY')) { + curl_setopt($ch, CURLOPT_PROXY, PROXY_URL); } + $content = curl_exec($ch); + curl_close($ch); + if($content === false) debugMessage('Cant\'t download ' . $url); - // handle compressed data - foreach($http_response_header as $header) { - if(stristr($header, 'content-encoding')) { - switch(true) { - case stristr($header, 'gzip'): - $content = gzinflate(substr($content, 10, -8)); - break; - case stristr($header, 'compress'): - //TODO - case stristr($header, 'deflate'): - //TODO - case stristr($header, 'brotli'): - //TODO - returnServerError($header . '=> Not implemented yet'); - break; - case stristr($header, 'identity'): - break; - default: - returnServerError($header . '=> Unknown compression'); - } - } - } - return $content; } function getSimpleHTMLDOM($url, -$use_include_path = false, -$context = null, -$offset = 0, -$maxLen = null, +$header = array(), +$opts = array(), $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT){ - $content = getContents($url, $use_include_path, $context, $offset, $maxLen); + $content = getContents($url, $header, $opts); return str_get_html($content, $lowercase, $forceTagsClosed, @@ -89,10 +53,8 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){ */ function getSimpleHTMLDOMCached($url, $duration = 86400, -$use_include_path = false, -$context = null, -$offset = 0, -$maxLen = null, +$header = array(), +$opts = array(), $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, @@ -116,7 +78,7 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){ && (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration $content = $cache->loadData(); } else { // Content not within duration - $content = getContents($url, $use_include_path, $context, $offset, $maxLen); + $content = getContents($url, $header, $opts); if($content !== false) { $cache->saveData($content); } From 6caca4946bf4593e4c6d254e871d2783286eeb37 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 14:01:35 +0200 Subject: [PATCH 3/8] bridges: Fix bridges with custom headers and options This commit fixes bridges which called getContents, getSimpleHTMLDOM or getSimpleHTMLDOMCached with custom settings. --- bridges/Arte7Bridge.php | 8 +++----- bridges/FacebookBridge.php | 36 +++++++++++------------------------- bridges/VkBridge.php | 14 ++------------ bridges/YoutubeBridge.php | 4 ++-- 4 files changed, 18 insertions(+), 44 deletions(-) diff --git a/bridges/Arte7Bridge.php b/bridges/Arte7Bridge.php index 1162d17..16952dc 100644 --- a/bridges/Arte7Bridge.php +++ b/bridges/Arte7Bridge.php @@ -64,13 +64,11 @@ class Arte7Bridge extends BridgeAbstract { . $lang . ($category != null ? '&category.code=' . $category : ''); - $context = array( - 'http' => array( - 'header' => 'Authorization: Bearer '. self::API_TOKEN - ) + $header = array( + 'Authorization: Bearer ' . self::API_TOKEN ); - $input = getContents($url, false, stream_context_create($context)) or die('Could not request ARTE.'); + $input = getContents($url, $header) or die('Could not request ARTE.'); $input_json = json_decode($input, true); foreach($input_json['videos'] as $element) { diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index b0b4324..7d6b249 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -96,17 +96,15 @@ class FacebookBridge extends BridgeAbstract { $captcha_action = $_SESSION['captcha_action']; $captcha_fields = $_SESSION['captcha_fields']; $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']); - $http_options = array( - 'http' => array( - 'method' => 'POST', - 'user_agent' => ini_get('user_agent'), - 'header' => array("Content-type: - application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"), - 'content' => http_build_query($captcha_fields) - ), + + $header = array("Content-type: +application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"); + $opts = array( + CURLOPT_POST => 1, + CURLOPT_POSTFIELDS => http_build_query($captcha_fields) ); - $context = stream_context_create($http_options); - $html = getContents($captcha_action, false, $context); + + $html = getContents($captcha_action, $header, $opts); if($html === false) { returnServerError('Failed to submit captcha response back to Facebook'); @@ -120,15 +118,7 @@ class FacebookBridge extends BridgeAbstract { //Retrieve page contents if(is_null($html)) { - $http_options = array( - 'http' => array( - 'method' => 'GET', - 'user_agent' => ini_get('user_agent'), - 'header' => 'Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n" - ) - ); - - $context = stream_context_create($http_options); + $header = array('Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n"); // First character cannot be a forward slash if(strpos($this->getInput('u'), "/") === 0) { @@ -136,14 +126,10 @@ class FacebookBridge extends BridgeAbstract { } if(!strpos($this->getInput('u'), "/")) { - $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', - false, - $context) + $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', $header) or returnServerError('No results for this query.'); } else { - $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', - false, - $context) + $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', $header) or returnServerError('No results for this query.'); } } diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 4eba961..d8099a5 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -109,19 +109,9 @@ class VkBridge extends BridgeAbstract { ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'); - $opts = array( - 'http' => array( - 'method' => "GET", - 'user_agent' => ini_get('user_agent'), - 'accept_encoding' => 'gzip', - 'header' => "Accept-language: en\r\n - Cookie: remixlang=3\r\n" - ) - ); + $header = array("Accept-language: en\r\nCookie: remixlang=3\r\n"); - $context = stream_context_create($opts); - - return getContents($this->getURI(), false, $context); + return getContents($this->getURI(), $header); } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 8ec1f14..5923686 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -141,8 +141,8 @@ class YoutubeBridge extends BridgeAbstract { private function ytGetSimpleHTMLDOM($url){ return getSimpleHTMLDOM($url, $use_include_path = false, - $context = null, - $offset = 0, + $header = array(), + $opts = array(), $maxLen = null, $lowercase = true, $forceTagsClosed = true, From 5bd9c1611dd961ee7d7c32a5948de8babff89bac Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 15:28:00 +0200 Subject: [PATCH 4/8] [contents] Limit cURL protocols to HTTP and HTTPS --- lib/contents.php | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/contents.php b/lib/contents.php index f9b68d2..7fbda2d 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -6,6 +6,7 @@ function getContents($url, $header = array(), $opts = array()){ curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent')); curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); if(is_array($opts)) { foreach($opts as $key => $value) { From bc28c5da8e93ad38bc54726abea51462522f6158 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 15:48:30 +0200 Subject: [PATCH 5/8] [contents] Set CURLOPT_HTTPHEADER only if the provided array contains data --- lib/contents.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/contents.php b/lib/contents.php index 7fbda2d..591e38e 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -3,7 +3,10 @@ function getContents($url, $header = array(), $opts = array()){ $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_HTTPHEADER, $header); + + if(is_array($header) && count($header) !== 0) + curl_setopt($ch, CURLOPT_HTTPHEADER, $header); + curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent')); curl_setopt($ch, CURLOPT_ENCODING, ''); curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); From 4b3efed7ec8304b842ba65acc6538a40c71fe71a Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 16:13:00 +0200 Subject: [PATCH 6/8] [YoutubeBridge] Fix too many parameters when using HTML mode --- bridges/YoutubeBridge.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 5923686..694e85e 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -140,10 +140,8 @@ class YoutubeBridge extends BridgeAbstract { private function ytGetSimpleHTMLDOM($url){ return getSimpleHTMLDOM($url, - $use_include_path = false, $header = array(), $opts = array(), - $maxLen = null, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, From ecfc220b100cc73b70702cbbaaafa08c04b0d8c3 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 16:20:25 +0200 Subject: [PATCH 7/8] [KernelBugTrackerBridge] Fix too many parameters requesting HTML DOM --- bridges/KernelBugTrackerBridge.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/bridges/KernelBugTrackerBridge.php b/bridges/KernelBugTrackerBridge.php index 567ee50..f3135af 100644 --- a/bridges/KernelBugTrackerBridge.php +++ b/bridges/KernelBugTrackerBridge.php @@ -45,9 +45,7 @@ class KernelBugTrackerBridge extends BridgeAbstract { // We use the print preview page for simplicity $html = getSimpleHTMLDOMCached($this->getURI() . '&format=multiple', 86400, - false, null, - 0, null, true, true, From 45c3dcb636e9e63ab17a405491a234b74df7af9b Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Fri, 6 Apr 2018 20:24:43 +0200 Subject: [PATCH 8/8] [VkBridge] Simplify header specification --- bridges/VkBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index d8099a5..e152956 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -109,7 +109,7 @@ class VkBridge extends BridgeAbstract { ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'); - $header = array("Accept-language: en\r\nCookie: remixlang=3\r\n"); + $header = array('Accept-language: en', 'Cookie: remixlang=3'); return getContents($this->getURI(), $header); }