strip utf8mb4 characters in enclosures on mysql

This commit is contained in:
Andrew Dolgov 2018-01-29 23:24:11 +03:00
parent e038374608
commit 102a01354b

20
classes/rssutils.php Normal file → Executable file
View file

@ -16,6 +16,11 @@ class RSSUtils {
return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp); return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
} }
// Strips utf8mb4 characters (i.e. emoji) for mysql
static function strip_utf8mb4($str) {
return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
}
static function update_feedbrowser_cache() { static function update_feedbrowser_cache() {
$pdo = Db::pdo(); $pdo = Db::pdo();
@ -760,10 +765,9 @@ class RSSUtils {
// Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
if (DB_TYPE == "mysql") { if (DB_TYPE == "mysql") {
foreach ($article as $k => $v) { foreach ($article as $k => $v) {
// i guess we'll have to take the risk of 4byte unicode labels & tags here // i guess we'll have to take the risk of 4byte unicode labels & tags here
if (is_string($article[$k])) { if (is_string($article[$k])) {
$article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v); $article[$k] = RSSUtils::strip_utf8mb4($v);
} }
} }
} }
@ -1043,7 +1047,17 @@ class RSSUtils {
$e_item = array( $e_item = array(
rewrite_relative_url($site_url, $e->link), rewrite_relative_url($site_url, $e->link),
$e->type, $e->length, $e->title, $e->width, $e->height); $e->type, $e->length, $e->title, $e->width, $e->height);
array_push($enclosures, $e_item);
// Yet another episode of "mysql utf8_general_ci is gimped"
if (DB_TYPE == "mysql") {
for ($i = 0; $i < count($e_item); $i++) {
if (is_string($e_item[$i])) {
$e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
}
}
}
array_push($enclosures, $e_item);
} }
} }