From 102a01354b4f872a12feba3c9e875d9b0cd9dc36 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 29 Jan 2018 23:24:11 +0300 Subject: [PATCH] strip utf8mb4 characters in enclosures on mysql --- classes/rssutils.php | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) mode change 100644 => 100755 classes/rssutils.php diff --git a/classes/rssutils.php b/classes/rssutils.php old mode 100644 new mode 100755 index e659f096..52ac6806 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -16,6 +16,11 @@ class RSSUtils { return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp); } + // Strips utf8mb4 characters (i.e. emoji) for mysql + static function strip_utf8mb4($str) { + return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str); + } + static function update_feedbrowser_cache() { $pdo = Db::pdo(); @@ -760,10 +765,9 @@ class RSSUtils { // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077 if (DB_TYPE == "mysql") { foreach ($article as $k => $v) { - // i guess we'll have to take the risk of 4byte unicode labels & tags here if (is_string($article[$k])) { - $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v); + $article[$k] = RSSUtils::strip_utf8mb4($v); } } } @@ -1043,7 +1047,17 @@ class RSSUtils { $e_item = array( rewrite_relative_url($site_url, $e->link), $e->type, $e->length, $e->title, $e->width, $e->height); - array_push($enclosures, $e_item); + + // Yet another episode of "mysql utf8_general_ci is gimped" + if (DB_TYPE == "mysql") { + for ($i = 0; $i < count($e_item); $i++) { + if (is_string($e_item[$i])) { + $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]); + } + } + } + + array_push($enclosures, $e_item); } }