af_readability: force utf8 preamble on html document load. no idea why but it seems to work better even for not-unicode sites.

This commit is contained in:
Andrew Dolgov 2017-01-28 14:24:48 +03:00
parent 24c7e4132d
commit f45a1152bb

View file

@ -162,7 +162,7 @@ class Af_Readability extends Plugin {
if ($tmp && mb_strlen($tmp) < 1024 * 500) { if ($tmp && mb_strlen($tmp) < 1024 * 500) {
$tmpdoc = new DOMDocument("1.0", "UTF-8"); $tmpdoc = new DOMDocument("1.0", "UTF-8");
if (!$tmpdoc->loadHTML($tmp)) if (!$tmpdoc->loadHTML('<?xml encoding="utf-8" ?>\n' . $tmp))
return false; return false;
if (strtolower($tmpdoc->encoding) != 'utf-8') { if (strtolower($tmpdoc->encoding) != 'utf-8') {