From 747be1384944c48420e946c1b09d3fc7996a2fd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 25 Sep 2016 23:58:52 +0200 Subject: [PATCH] [core] extract HTMLUtils methods to make them functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Methods displayBridgeCard, sanitize, defaultImageSrcTo are now functions in lib/html.php getHelperButtinsFormat and getFormHeader are now anonymous functions defined in displayBridgeCard Signed-off-by: Pierre Mazière --- bridges/AcrimedBridge.php | 5 +- bridges/CourrierInternationalBridge.php | 4 +- bridges/WorldOfTanksBridge.php | 2 +- index.php | 4 +- lib/HTMLUtils.php | 307 ------------------------ lib/RssBridge.php | 2 +- lib/html.php | 292 ++++++++++++++++++++++ 7 files changed, 299 insertions(+), 317 deletions(-) delete mode 100644 lib/HTMLUtils.php create mode 100644 lib/html.php diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index f614f64..2d08b6b 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -13,10 +13,9 @@ class AcrimedBridge extends FeedExpander { protected function parseItem($newsItem){ $item = parent::parseItem($newsItem); - $hs = new HTMLSanitizer(); $articlePage = getSimpleHTMLDOM($newsItem->link); - $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); - $article = HTMLSanitizer::defaultImageSrcTo($article, static::URI); + $article = sanitize($articlePage->find('article.article1', 0)->innertext); + $article = defaultImageSrcTo($article, static::URI); $item['content'] = $article; return $item; diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index d2e1d74..4e2d86e 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -27,9 +27,7 @@ class CourrierInternationalBridge extends BridgeAbstract{ $page = getSimpleHTMLDOM($item['uri']); - $cleaner = new HTMLSanitizer(); - - $item['content'] = $cleaner->sanitize($page->find("div.article-text")[0]); + $item['content'] = sanitize($page->find("div.article-text")[0]); $item['title'] = strip_tags($article->find(".title")[0]); $dateTime = date_parse($page->find("time")[0]); diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index 256a6ca..2cd86c4 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -59,7 +59,7 @@ class WorldOfTanksBridge extends BridgeAbstract { debugMessage("loading page ".$item['uri']); $articlePage = getSimpleHTMLDOMCached($item['uri']); $content = $articlePage->find('.l-content', 0); - HTMLSanitizer::defaultImageSrcTo($content, self::URI); + defaultImageSrcTo($content, self::URI); $item['title'] = $content->find('h1', 0)->innertext; $item['content'] = $content->find('.b-content', 0)->innertext; $item['timestamp'] = $content->find('.b-statistic_time', 0)->getAttribute("data-timestamp"); diff --git a/index.php b/index.php index 6249f5b..c8ef051 100644 --- a/index.php +++ b/index.php @@ -188,11 +188,11 @@ $formats = Format::searchInformation(); $bridgeList = Bridge::listBridges(); foreach($bridgeList as $bridgeName){ if(Bridge::isWhitelisted($whitelist_selection, $bridgeName)){ - echo HTMLUtils::displayBridgeCard($bridgeName, $formats); + echo displayBridgeCard($bridgeName, $formats); $activeFoundBridgeCount++; } elseif($showInactive) { // inactive bridges - $inactiveBridges .= HTMLUtils::displayBridgeCard($bridgeName, $formats, false) . PHP_EOL; + $inactiveBridges .= displayBridgeCard($bridgeName, $formats, false) . PHP_EOL; } } echo $inactiveBridges; diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php deleted file mode 100644 index 750b15b..0000000 --- a/lib/HTMLUtils.php +++ /dev/null @@ -1,307 +0,0 @@ -' . $bridgeClass::NAME . ''; - $description = $bridgeClass::DESCRIPTION; - - $card = << -

{$name}

-

- {$description} -

- - -CARD; - - // If we don't have any parameter for the bridge, we print a generic form to load it. - if(count($bridgeClass::PARAMETERS) == 0){ - - $card .= HTMLUtils::getFormHeader($bridgeName); - - if($isActive){ - if(defined('PROXY_URL') && PROXY_BYBRIDGE){ - $idArg = 'arg-' - . urlencode($bridgeName) - . '-' - . urlencode('proxyoff') - . '-' - . urlencode('_noproxy'); - - $card .= '' - . PHP_EOL; - - $card .= '
' - . PHP_EOL; - } - - $card .= HTMLUtils::getHelperButtonsFormat($formats); - } else { - $card .= 'Inactive'; - } - - $card .= '' . PHP_EOL; - } - - $hasGlobalParameter = array_key_exists('global', $bridgeClass::PARAMETERS); - - if($hasGlobalParameter){ - $globalParameters = $bridgeClass::PARAMETERS['global']; - } - - foreach($bridgeClass::PARAMETERS as $parameterName => $parameter){ - if(!is_numeric($parameterName) && $parameterName == 'global') - continue; - - if($hasGlobalParameter) - $parameter = array_merge($parameter, $globalParameters); - - if(!is_numeric($parameterName)) - $card .= '
' . $parameterName . '
' . PHP_EOL; - - $card .= HTMLUtils::getFormHeader($bridgeName); - - foreach($parameter as $id => $inputEntry){ - $additionalInfoString = ''; - - if(isset($inputEntry['required']) && $inputEntry['required'] === true) - $additionalInfoString .= ' required'; - - if(isset($inputEntry['pattern'])) - $additionalInfoString .= ' pattern="' . $inputEntry['pattern'] . '"'; - - if(isset($inputEntry['title'])) - $additionalInfoString .= ' title="' . $inputEntry['title'] . '"'; - - if(!isset($inputEntry['exampleValue'])) - $inputEntry['exampleValue'] = ''; - - if(!isset($inputEntry['defaultValue'])) - $inputEntry['defaultValue'] = ''; - - $idArg = 'arg-' - . urlencode($bridgeName) - . '-' - . urlencode($parameterName) - . '-' - . urlencode($id); - - $card .= '' - . PHP_EOL; - - if(!isset($inputEntry['type']) || $inputEntry['type'] == 'text'){ - $card .= '
' - . PHP_EOL; - } elseif($inputEntry['type'] == 'number'){ - $card .= '
' - . PHP_EOL; - } else if($inputEntry['type'] == 'list'){ - $card .= '
'; - } elseif($inputEntry['type'] == 'checkbox'){ - if($inputEntry['defaultValue'] === 'checked') - $card .= '
' - . PHP_EOL; - else - $card .= '
' - . PHP_EOL; - } - } - - if($isActive){ - if(defined('PROXY_URL') && PROXY_BYBRIDGE){ - $idArg = 'arg-' - . urlencode($bridgeName) - . '-' - . urlencode('proxyoff') - . '-' - . urlencode('_noproxy'); - - $card .= '' - . PHP_EOL; - - $card .= '
' - . PHP_EOL; - } - $card .= HTMLUtils::getHelperButtonsFormat($formats); - } else { - $card .= 'Inactive'; - } - $card .= '' . PHP_EOL; - } - - $card .= ''; - $card .= '

' . $bridgeClass::MAINTAINER . '

'; - $card .= ''; - - return $card; - } - - private static function getHelperButtonsFormat($formats){ - $buttons = ''; - foreach($formats as $name){ - $buttons .= '' - . PHP_EOL; - } - - return $buttons; - } - - private static function getFormHeader($bridge){ - return << - - -EOD; - } -} - -class HTMLSanitizer { - - var $tagsToRemove; - var $keptAttributes; - var $onlyKeepText; - - public static $DEFAULT_CLEAR_TAGS = ["script", "iframe", "input", "form"]; - public static $KEPT_ATTRIBUTES = ["title", "href", "src"]; - public static $ONLY_TEXT = []; - - public function __construct($tags_to_remove = null - , $kept_attributes = null - , $only_keep_text = null){ - $this->tagsToRemove = is_null($tags_to_remove) ? HTMLSanitizer::$DEFAULT_CLEAR_TAGS : $tags_to_remove; - $this->keptAttributes = is_null($kept_attributes) ? HTMLSanitizer::$KEPT_ATTRIBUTES : $kept_attributes; - $this->onlyKeepText = is_null($only_keep_text) ? HTMLSanitizer::$ONLY_TEXT : $only_keep_text; - } - - public function sanitize($textToSanitize){ - $htmlContent = str_get_html($textToSanitize); - - foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element){ - if(in_array($element->tag, $this->onlyKeepText)){ - $element->outertext = $element->plaintext; - } elseif(in_array($element->tag, $this->tagsToRemove)){ - $element->outertext = ''; - } else { - foreach($element->getAllAttributes() as $attributeName => $attribute){ - if(!in_array($attributeName, $this->keptAttributes)) - $element->removeAttribute($attributeName); - } - } - } - - return $htmlContent; - } - - public static function defaultImageSrcTo($content, $server){ - foreach($content->find('img') as $image){ - if(is_null(strpos($image->src, "http")) - && is_null(strpos($image->src, "//")) - && is_null(strpos($image->src, "data:"))) - $image->src = $server . $image->src; - } - return $content; - } -} diff --git a/lib/RssBridge.php b/lib/RssBridge.php index a0b5a42..86ad644 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -15,7 +15,7 @@ require __DIR__ . '/BridgeAbstract.php'; require __DIR__ . '/FeedExpander.php'; require __DIR__ . '/Cache.php'; require __DIR__ . '/CacheAbstract.php'; -require __DIR__ . '/HTMLUtils.php'; +require __DIR__ . '/html.php'; require __DIR__ . '/error.php'; require __DIR__ . '/contents.php'; diff --git a/lib/html.php b/lib/html.php new file mode 100644 index 0000000..e329102 --- /dev/null +++ b/lib/html.php @@ -0,0 +1,292 @@ +' + . $name + . '' + . PHP_EOL; + } + + return $buttons; + }; + + $getFormHeader = function($bridge){ + return << + + +EOD; + }; + + $bridgeElement = Bridge::create($bridgeName); + $bridgeClass = $bridgeName . 'Bridge'; + + if($bridgeElement == false) + return ""; + + $name = '' . $bridgeClass::NAME . ''; + $description = $bridgeClass::DESCRIPTION; + + $card = << +

{$name}

+

+ {$description} +

+ + +CARD; + + // If we don't have any parameter for the bridge, we print a generic form to load it. + if(count($bridgeClass::PARAMETERS) == 0){ + + $card .= $getFormHeader($bridgeName); + + if($isActive){ + if(defined('PROXY_URL') && PROXY_BYBRIDGE){ + $idArg = 'arg-' + . urlencode($bridgeName) + . '-' + . urlencode('proxyoff') + . '-' + . urlencode('_noproxy'); + + $card .= '' + . PHP_EOL; + + $card .= '
' + . PHP_EOL; + } + + $card .= $getHelperButtonsFormat($formats); + } else { + $card .= 'Inactive'; + } + + $card .= '' . PHP_EOL; + } + + $hasGlobalParameter = array_key_exists('global', $bridgeClass::PARAMETERS); + + if($hasGlobalParameter){ + $globalParameters = $bridgeClass::PARAMETERS['global']; + } + + foreach($bridgeClass::PARAMETERS as $parameterName => $parameter){ + if(!is_numeric($parameterName) && $parameterName == 'global') + continue; + + if($hasGlobalParameter) + $parameter = array_merge($parameter, $globalParameters); + + if(!is_numeric($parameterName)) + $card .= '
' . $parameterName . '
' . PHP_EOL; + + $card .= $getFormHeader($bridgeName); + + foreach($parameter as $id => $inputEntry){ + $additionalInfoString = ''; + + if(isset($inputEntry['required']) && $inputEntry['required'] === true) + $additionalInfoString .= ' required'; + + if(isset($inputEntry['pattern'])) + $additionalInfoString .= ' pattern="' . $inputEntry['pattern'] . '"'; + + if(isset($inputEntry['title'])) + $additionalInfoString .= ' title="' . $inputEntry['title'] . '"'; + + if(!isset($inputEntry['exampleValue'])) + $inputEntry['exampleValue'] = ''; + + if(!isset($inputEntry['defaultValue'])) + $inputEntry['defaultValue'] = ''; + + $idArg = 'arg-' + . urlencode($bridgeName) + . '-' + . urlencode($parameterName) + . '-' + . urlencode($id); + + $card .= '' + . PHP_EOL; + + if(!isset($inputEntry['type']) || $inputEntry['type'] == 'text'){ + $card .= '
' + . PHP_EOL; + } elseif($inputEntry['type'] == 'number'){ + $card .= '
' + . PHP_EOL; + } else if($inputEntry['type'] == 'list'){ + $card .= '
'; + } elseif($inputEntry['type'] == 'checkbox'){ + if($inputEntry['defaultValue'] === 'checked') + $card .= '
' + . PHP_EOL; + else + $card .= '
' + . PHP_EOL; + } + } + + if($isActive){ + if(defined('PROXY_URL') && PROXY_BYBRIDGE){ + $idArg = 'arg-' + . urlencode($bridgeName) + . '-' + . urlencode('proxyoff') + . '-' + . urlencode('_noproxy'); + + $card .= '' + . PHP_EOL; + + $card .= '
' + . PHP_EOL; + } + $card .= $getHelperButtonsFormat($formats); + } else { + $card .= 'Inactive'; + } + $card .= '' . PHP_EOL; + } + + $card .= ''; + $card .= '

' . $bridgeClass::MAINTAINER . '

'; + $card .= ''; + + return $card; +} + +function sanitize($textToSanitize + ,$removedTags=array('script','iframe','input','form') + ,$keptAttributes=array('title','href','src') + ,$keptText=array()){ + $htmlContent = str_get_html($textToSanitize); + + foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element){ + if(in_array($element->tag, $keptText)){ + $element->outertext = $element->plaintext; + } elseif(in_array($element->tag, $removedTags)){ + $element->outertext = ''; + } else { + foreach($element->getAllAttributes() as $attributeName => $attribute){ + if(!in_array($attributeName, $keptAttributes)) + $element->removeAttribute($attributeName); + } + } + } + + return $htmlContent; +} + +function defaultImageSrcTo($content, $server){ + foreach($content->find('img') as $image){ + if(is_null(strpos($image->src, "http")) + && is_null(strpos($image->src, "//")) + && is_null(strpos($image->src, "data:"))) + $image->src = $server . $image->src; + } + return $content; +} + +?>