diff --git a/lib/RssBridge.php b/lib/RssBridge.php index b570076..1d66e73 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -34,6 +34,17 @@ if(!file_exists($vendorLibSimpleHtmlDom)) { } require_once $vendorLibSimpleHtmlDom; +$vendorLibPhpUrlJoin = __DIR__ . PATH_VENDOR . '/php-urljoin/src/urljoin.php'; +if(!file_exists($vendorLibPhpUrlJoin)) { + throw new \HttpException('"php-urljoin" library is missing. + Get it from https://github.com/fluffy-critter/php-urljoin and place the script "urljoin.php" in ' + . substr(PATH_VENDOR, 4) + . '/php-urljoin/src/', + 500); +} +require_once $vendorLibPhpUrlJoin; + + /* Example use require_once __DIR__ . '/lib/RssBridge.php'; diff --git a/lib/html.php b/lib/html.php index 3214eef..5098f6c 100644 --- a/lib/html.php +++ b/lib/html.php @@ -42,18 +42,11 @@ function backgroundToImg($htmlContent) { function defaultLinkTo($content, $server){ foreach($content->find('img') as $image) { - if(strpos($image->src, 'http') === false - && strpos($image->src, '//') === false - && strpos($image->src, 'data:') === false) - $image->src = $server . $image->src; + $image->src = urljoin($server, $image->src); } foreach($content->find('a') as $anchor) { - if(strpos($anchor->href, 'http') === false - && strpos($anchor->href, '//') === false - && strpos($anchor->href, '#') !== 0 - && strpos($anchor->href, '?') !== 0) - $anchor->href = $server . $anchor->href; + $anchor->href = urljoin($server, $anchor->href); } return $content; diff --git a/vendor/php-urljoin/src/urljoin.php b/vendor/php-urljoin/src/urljoin.php new file mode 100644 index 0000000..4f62f90 --- /dev/null +++ b/vendor/php-urljoin/src/urljoin.php @@ -0,0 +1,131 @@ + 'path/to/file.ext' + if (substr($prel['path'], 0, 2) === './') { + $prel['path'] = substr($prel['path'], 2); + } + + if (array_key_exists('path', $pbase)) { + $dir = preg_replace('@/[^/]*$@', '', $pbase['path']); + $merged['path'] = $dir . '/' . $prel['path']; + } else { + $merged['path'] = '/' . $prel['path']; + } + + } + + if(array_key_exists('path', $merged)) { + // Get the path components, and remove the initial empty one + $pathParts = explode('/', $merged['path']); + array_shift($pathParts); + + $path = []; + $prevPart = ''; + foreach ($pathParts as $part) { + if ($part == '..' && count($path) > 0) { + // Cancel out the parent directory (if there's a parent to cancel) + $parent = array_pop($path); + // But if it was also a parent directory, leave it in + if ($parent == '..') { + array_push($path, $parent); + array_push($path, $part); + } + } else if ($prevPart != '' || ($part != '.' && $part != '')) { + // Don't include empty or current-directory components + if ($part == '.') { + $part = ''; + } + array_push($path, $part); + } + $prevPart = $part; + } + $merged['path'] = '/' . implode('/', $path); + } + + $ret = ''; + if (isset($merged['scheme'])) { + $ret .= $merged['scheme'] . ':'; + } + + if (isset($merged['scheme']) || isset($merged['host'])) { + $ret .= '//'; + } + + if (isset($prel['host'])) { + $hostSource = $prel; + } else { + $hostSource = $pbase; + } + + // username, password, and port are associated with the hostname, not merged + if (isset($hostSource['host'])) { + if (isset($hostSource['user'])) { + $ret .= $hostSource['user']; + if (isset($hostSource['pass'])) { + $ret .= ':' . $hostSource['pass']; + } + $ret .= '@'; + } + $ret .= $hostSource['host']; + if (isset($hostSource['port'])) { + $ret .= ':' . $hostSource['port']; + } + } + + if (isset($merged['path'])) { + $ret .= $merged['path']; + } + + if (isset($prel['query'])) { + $ret .= '?' . $prel['query']; + } + + if (isset($prel['fragment'])) { + $ret .= '#' . $prel['fragment']; + } + + return $ret; +}