From e8cc26e79c701590af14612f6da6757d51a05965 Mon Sep 17 00:00:00 2001 From: Igor Wiedler Date: Sat, 24 Apr 2010 15:42:29 +0200 Subject: [PATCH 1/5] [ticket/9669] Add native Normalizer support PHP 5.3 includes the previous pecl extension "intl" which has a native UTF-8 normalizer. PHPBB3-9669 --- phpBB/includes/utf/utf_tools.php | 109 ++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 32 deletions(-) diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index cac5b4e744..8fa91a4c5b 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -1712,49 +1712,94 @@ function utf8_case_fold_nfc($text, $option = 'full') return $text; } -/** -* A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings -* to be in NFC (Normalization Form Composition). -* -* @param mixed $strings a string or an array of strings to normalize -* @return mixed the normalized content, preserving array keys if array given. -*/ -function utf8_normalize_nfc($strings) +if (!extension_loaded('intl')) { - if (empty($strings)) + /** + * A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings + * to be in NFC (Normalization Form Composition). + * + * @param mixed $strings a string or an array of strings to normalize + * @return mixed the normalized content, preserving array keys if array given. + */ + function utf8_normalize_nfc($strings) { - return $strings; - } - - if (!class_exists('utf_normalizer')) - { - global $phpbb_root_path, $phpEx; - include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); - } - - if (!is_array($strings)) - { - utf_normalizer::nfc($strings); - } - else if (is_array($strings)) - { - foreach ($strings as $key => $string) + if (empty($strings)) { - if (is_array($string)) + return $strings; + } + + if (!class_exists('utf_normalizer')) + { + global $phpbb_root_path, $phpEx; + include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); + } + + if (!is_array($strings)) + { + utf_normalizer::nfc($strings); + } + else if (is_array($strings)) + { + foreach ($strings as $key => $string) { - foreach ($string as $_key => $_string) + if (is_array($string)) { - utf_normalizer::nfc($strings[$key][$_key]); + foreach ($string as $_key => $_string) + { + utf_normalizer::nfc($strings[$key][$_key]); + } + } + else + { + utf_normalizer::nfc($strings[$key]); } } - else + } + + return $strings; + } +} +else +{ + /** + * wrapper around PHP's native normalizer from intl + * previously a PECL extension, included in the core since PHP 5.3.0 + * http://php.net/manual/en/normalizer.normalize.php + * + * @param mixed $strings a string or an array of strings to normalize + * @return mixed the normalized content, preserving array keys if array given. + */ + function utf8_normalize_nfc($strings) + { + if (empty($strings)) + { + return $strings; + } + + if (!is_array($strings)) + { + $strings = Normalizer::normalize($strings); + } + if (is_array($strings)) + { + foreach ($strings as $key => $string) { - utf_normalizer::nfc($strings[$key]); + if (is_array($string)) + { + foreach ($string as $_key => $_string) + { + $strings[$key][$_key] = Normalizer::normalize($strings[$key][$_key]); + } + } + else + { + $strings[$key] = Normalizer::normalize($strings[$key]); + } } } - } - return $strings; + return $strings; + } } /** From 66c116f7deb7adcd8928026aeceb536c72f5e86c Mon Sep 17 00:00:00 2001 From: Igor Wiedler Date: Mon, 26 Apr 2010 20:46:09 +0200 Subject: [PATCH 2/5] [ticket/9669] Add isNormalized checks for performance Since isNormalized is less expensive than normalize[1] and normalization will be applied repeatedly in most cases[2], it's more efficient to check for isNormalized. [1] http://area51.phpbb.com/phpBB/viewtopic.php?f=81&t=32718&p=208005#p208005 [2] phpBB 3.0 has a call to utf8_normalize_nfc wrapped around any multibyte request_var call. PHPBB3-9669 --- phpBB/includes/utf/utf_tools.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index 8fa91a4c5b..4cc0486e91 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -1778,9 +1778,13 @@ else if (!is_array($strings)) { - $strings = Normalizer::normalize($strings); + if (Normalizer::isNormalized($strings)) + { + return $strings; + } + return Normalizer::normalize($strings); } - if (is_array($strings)) + else { foreach ($strings as $key => $string) { @@ -1788,11 +1792,19 @@ else { foreach ($string as $_key => $_string) { + if (Normalizer::isNormalized($strings[$key][$_key])) + { + continue; + } $strings[$key][$_key] = Normalizer::normalize($strings[$key][$_key]); } } else { + if (Normalizer::isNormalized($strings[$key])) + { + continue; + } $strings[$key] = Normalizer::normalize($strings[$key]); } } From 6df35c2cb6895fedee6a85645b369bff52c25670 Mon Sep 17 00:00:00 2001 From: Igor Wiedler Date: Tue, 22 Jun 2010 14:10:34 +0200 Subject: [PATCH 3/5] [ticket/9669] Make sure normalize_nfc returns string Normalizer::normalize can return null, cast to a string. PHPBB3-9669 --- phpBB/includes/utf/utf_tools.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index 4cc0486e91..34f9633677 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -1782,7 +1782,7 @@ else { return $strings; } - return Normalizer::normalize($strings); + return (string) Normalizer::normalize($strings); } else { @@ -1796,7 +1796,7 @@ else { continue; } - $strings[$key][$_key] = Normalizer::normalize($strings[$key][$_key]); + $strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]); } } else @@ -1805,7 +1805,7 @@ else { continue; } - $strings[$key] = Normalizer::normalize($strings[$key]); + $strings[$key] = (string) Normalizer::normalize($strings[$key]); } } } From 6681b2417a3f87a666c816658a2daab8a464e233 Mon Sep 17 00:00:00 2001 From: Igor Wiedler Date: Sun, 6 Mar 2011 21:39:42 +0100 Subject: [PATCH 4/5] [ticket/9669] Switch if/else to make the if positive PHPBB3-9669 --- phpBB/includes/utf/utf_tools.php | 96 ++++++++++++++++---------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index 34f9633677..3ee121a179 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -1712,54 +1712,7 @@ function utf8_case_fold_nfc($text, $option = 'full') return $text; } -if (!extension_loaded('intl')) -{ - /** - * A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings - * to be in NFC (Normalization Form Composition). - * - * @param mixed $strings a string or an array of strings to normalize - * @return mixed the normalized content, preserving array keys if array given. - */ - function utf8_normalize_nfc($strings) - { - if (empty($strings)) - { - return $strings; - } - - if (!class_exists('utf_normalizer')) - { - global $phpbb_root_path, $phpEx; - include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); - } - - if (!is_array($strings)) - { - utf_normalizer::nfc($strings); - } - else if (is_array($strings)) - { - foreach ($strings as $key => $string) - { - if (is_array($string)) - { - foreach ($string as $_key => $_string) - { - utf_normalizer::nfc($strings[$key][$_key]); - } - } - else - { - utf_normalizer::nfc($strings[$key]); - } - } - } - - return $strings; - } -} -else +if (extension_loaded('intl')) { /** * wrapper around PHP's native normalizer from intl @@ -1813,6 +1766,53 @@ else return $strings; } } +else +{ + /** + * A wrapper function for the normalizer which takes care of including the class if + * required and modifies the passed strings to be in NFC (Normalization Form Composition). + * + * @param mixed $strings a string or an array of strings to normalize + * @return mixed the normalized content, preserving array keys if array given. + */ + function utf8_normalize_nfc($strings) + { + if (empty($strings)) + { + return $strings; + } + + if (!class_exists('utf_normalizer')) + { + global $phpbb_root_path, $phpEx; + include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); + } + + if (!is_array($strings)) + { + utf_normalizer::nfc($strings); + } + else if (is_array($strings)) + { + foreach ($strings as $key => $string) + { + if (is_array($string)) + { + foreach ($string as $_key => $_string) + { + utf_normalizer::nfc($strings[$key][$_key]); + } + } + else + { + utf_normalizer::nfc($strings[$key]); + } + } + } + + return $strings; + } +} /** * This function is used to generate a "clean" version of a string. From 90cc061fc0c799cbf07b20ca83c3c9b554bda0f9 Mon Sep 17 00:00:00 2001 From: Igor Wiedler Date: Mon, 7 Mar 2011 09:49:58 +0100 Subject: [PATCH 5/5] [ticket/9669] Replace spaces with tabs PHPBB3-9669 --- phpBB/includes/utf/utf_tools.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phpBB/includes/utf/utf_tools.php b/phpBB/includes/utf/utf_tools.php index 3ee121a179..65d40c0fd3 100644 --- a/phpBB/includes/utf/utf_tools.php +++ b/phpBB/includes/utf/utf_tools.php @@ -1768,7 +1768,7 @@ if (extension_loaded('intl')) } else { - /** + /** * A wrapper function for the normalizer which takes care of including the class if * required and modifies the passed strings to be in NFC (Normalization Form Composition). *