Merge pull request #1880 from nicolas-grekas/develop

[3.2] Rely on Intl and mbstring, use patchwork/utf8 as fallback * nicolas-grekas/develop: [feature/patchwork-utf8] Rely on mbstring, use patchwork/utf8 as fallback [feature/patchwork-utf8] Remove utf8_str_replace [feature/patchwork-utf8] Normalize with intl, use patchwork/utf8 as fallback
2025-06-28 22:28:51 +00:00 · 2014-11-11 13:07:54 +01:00 · 2014-11-11 13:07:54 +01:00 · 16de388437
commit 16de388437
parent 4535740aa2 d1a879a56c
21 changed files with 180 additions and 3452 deletions
--- a/phpBB/composer.json
+++ b/phpBB/composer.json
@ -27,6 +27,7 @@
 	"require": {
 		"php": ">=5.3.3",
 		"lusitanian/oauth": "0.2.*",
 		"patchwork/utf8": "1.1.*",
 		"symfony/config": "2.5.*",
 		"symfony/console": "2.5.*",
 		"symfony/dependency-injection": "2.5.*",
--- a/phpBB/composer.lock
+++ b/phpBB/composer.lock
@ -68,6 +68,62 @@
            ],
            "time": "2013-08-29 21:40:04"
        },
        {
            "name": "patchwork/utf8",
            "version": "v1.1.26",
            "source": {
                "type": "git",
                "url": "https://github.com/tchwork/utf8.git",
                "reference": "6b8e46603b49ee87ad6bceb314da94cc04ffcdce"
            },
            "dist": {
                "type": "zip",
                "url": "https://api.github.com/repos/tchwork/utf8/zipball/6b8e46603b49ee87ad6bceb314da94cc04ffcdce",
                "reference": "6b8e46603b49ee87ad6bceb314da94cc04ffcdce",
                "shasum": ""
            },
            "require": {
                "lib-pcre": ">=7.3",
                "php": ">=5.3.0"
            },
            "suggest": {
                "ext-iconv": "Use iconv for best performance",
                "ext-intl": "Use Intl for best performance",
                "ext-mbstring": "Use Mbstring for best performance"
            },
            "type": "library",
            "extra": {
                "branch-alias": {
                    "dev-master": "1.1-dev"
                }
            },
            "autoload": {
                "psr-0": {
                    "Patchwork": "class/",
                    "Normalizer": "class/"
                }
            },
            "notification-url": "https://packagist.org/downloads/",
            "license": [
                "(Apache-2.0 or GPL-2.0)"
            ],
            "authors": [
                {
                    "name": "Nicolas Grekas",
                    "email": "p@tchwork.com"
                }
            ],
            "description": "Portable and performant UTF-8, Unicode and Grapheme Clusters for PHP",
            "homepage": "https://github.com/tchwork/utf8",
            "keywords": [
                "grapheme",
                "i18n",
                "unicode",
                "utf-8",
                "utf8"
            ],
            "time": "2014-11-08 10:13:25"
        },
        {
            "name": "psr/log",
            "version": "1.0.0",
--- a/phpBB/develop/generate_utf_tables.php
+++ b/phpBB/develop/generate_utf_tables.php
@ -32,262 +32,11 @@ $phpbb_root_path = '../';
 $phpEx = substr(strrchr(__FILE__, '.'), 1);
 echo "Checking for required files\n";
 download('http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt');
 download('http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt');
 download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
 echo "\n";
 require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 $file_contents = array();
 /**
-* Generate some Hangul/Jamo stuff
+* Generate the files needed by the search indexer
 */
 echo "\nGenerating Hangul and Jamo tables\n";
 for ($i = 0; $i < UNICODE_HANGUL_LCOUNT; ++$i)
 {
 	$utf_char = cp_to_utf(UNICODE_HANGUL_LBASE + $i);
 	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT + UNICODE_HANGUL_SBASE;
 	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_L;
 }
 for ($i = 0; $i < UNICODE_HANGUL_VCOUNT; ++$i)
 {
 	$utf_char = cp_to_utf(UNICODE_HANGUL_VBASE + $i);
 	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i * UNICODE_HANGUL_TCOUNT;
 	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_V;
 }
 for ($i = 0; $i < UNICODE_HANGUL_TCOUNT; ++$i)
 {
 	$utf_char = cp_to_utf(UNICODE_HANGUL_TBASE + $i);
 	$file_contents['utf_normalizer_common']['utf_jamo_index'][$utf_char] = $i;
 	$file_contents['utf_normalizer_common']['utf_jamo_type'][$utf_char] = UNICODE_JAMO_T;
 }
 /**
 * Load the CompositionExclusions table
 */
 echo "Loading CompositionExclusion\n";
 $fp = fopen('CompositionExclusions.txt', 'rt');
 $exclude = array();
 while (!feof($fp))
 {
 	$line = fgets($fp, 1024);
 	if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
 	{
 		continue;
 	}
 	$cp = strtok($line, ' ');
 	if ($pos = strpos($cp, '..'))
 	{
 		$start = hexdec(substr($cp, 0, $pos));
 		$end = hexdec(substr($cp, $pos + 2));
 		for ($i = $start; $i < $end; ++$i)
 		{
 			$exclude[$i] = 1;
 		}
 	}
 	else
 	{
 		$exclude[hexdec($cp)] = 1;
 	}
 }
 fclose($fp);
 /**
 * Load QuickCheck tables
 */
 echo "Generating QuickCheck tables\n";
 $fp = fopen('DerivedNormalizationProps.txt', 'rt');
 while (!feof($fp))
 {
 	$line = fgets($fp, 1024);
 	if (!strpos(' 0123456789ABCDEFabcdef', $line[0]))
 	{
 		continue;
 	}
 	$p = array_map('trim', explode(';', strtok($line, '#')));
 	/**
 	* Capture only NFC_QC, NFKC_QC
 	*/
 	if (!preg_match('#^NFK?C_QC$#', $p[1]))
 	{
 		continue;
 	}
 	if ($pos = strpos($p[0], '..'))
 	{
 		$start = hexdec(substr($p[0], 0, $pos));
 		$end = hexdec(substr($p[0], $pos + 2));
 	}
 	else
 	{
 		$start = $end = hexdec($p[0]);
 	}
 	if ($start >= UTF8_HANGUL_FIRST && $end <= UTF8_HANGUL_LAST)
 	{
 		/**
 		* We do not store Hangul syllables in the array
 		*/
 		continue;
 	}
 	if ($p[2] == 'M')
 	{
 		$val = UNICODE_QC_MAYBE;
 	}
 	else
 	{
 		$val = UNICODE_QC_NO;
 	}
 	if ($p[1] == 'NFKC_QC')
 	{
 		$file = 'utf_nfkc_qc';
 	}
 	else
 	{
 		$file = 'utf_nfc_qc';
 	}
 	for ($i = $start; $i <= $end; ++$i)
 	{
 		/**
 		* The vars have the same name as the file: $utf_nfc_qc is in utf_nfc_qc.php
 		*/
 		$file_contents[$file][$file][cp_to_utf($i)] = $val;
 	}
 }
 fclose($fp);
 /**
 * Do mappings
 */
 echo "Loading Unicode decomposition mappings\n";
 $fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
 $map = array();
 while (!feof($fp))
 {
 	$p = explode(';', fgets($fp, 1024));
 	$cp = hexdec($p[0]);
 	if (!empty($p[3]))
 	{
 		/**
 		* Store combining class > 0
 		*/
 		$file_contents['utf_normalizer_common']['utf_combining_class'][cp_to_utf($cp)] = (int) $p[3];
 	}
 	if (!isset($p[5]) || !preg_match_all('#[0-9A-F]+#', strip_tags($p[5]), $m))
 	{
 		continue;
 	}
 	if (strpos($p[5], '>'))
 	{
 		$map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
 	}
 	else
 	{
 		$map['NFD'][$cp] = $map['NFKD'][$cp] = implode(' ', array_map('hexdec', $m[0]));
 	}
 }
 fclose($fp);
 /**
 * Build the canonical composition table
 */
 echo "Generating the Canonical Composition table\n";
 foreach ($map['NFD'] as $cp => $decomp_seq)
 {
 	if (!strpos($decomp_seq, ' ') || isset($exclude[$cp]))
 	{
 		/**
 		* Singletons are excluded from canonical composition
 		*/
 		continue;
 	}
 	$utf_seq = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
 	if (!isset($file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq]))
 	{
 		$file_contents['utf_canonical_comp']['utf_canonical_comp'][$utf_seq] = cp_to_utf($cp);
 	}
 }
 /**
 * Decompose the NF[K]D mappings recursively and prepare the file contents
 */
 echo "Generating the Canonical and Compatibility Decomposition tables\n\n";
 foreach ($map as $type => $decomp_map)
 {
 	foreach ($decomp_map as $cp => $decomp_seq)
 	{
 		$decomp_map[$cp] = decompose($decomp_map, $decomp_seq);
 	}
 	unset($decomp_seq);
 	if ($type == 'NFKD')
 	{
 		$file = 'utf_compatibility_decomp';
 		$var = 'utf_compatibility_decomp';
 	}
 	else
 	{
 		$file = 'utf_canonical_decomp';
 		$var = 'utf_canonical_decomp';
 	}
 	/**
 	* Generate the corresponding file
 	*/
 	foreach ($decomp_map as $cp => $decomp_seq)
 	{
 		$file_contents[$file][$var][cp_to_utf($cp)] = implode('', array_map('cp_to_utf', explode(' ', $decomp_seq)));
 	}
 }
 /**
 * Generate and/or alter the files
 */
 foreach ($file_contents as $file => $contents)
 {
 	/**
 	* Generate a new file
 	*/
 	echo "Writing to $file.$phpEx\n";
 	if (!$fp = fopen($phpbb_root_path . 'includes/utf/data/' . $file . '.' . $phpEx, 'wb'))
 	{
 		trigger_error('Cannot open ' . $file . ' for write');
 	}
 	fwrite($fp, '<?php');
 	foreach ($contents as $var => $val)
 	{
 		fwrite($fp, "\n\$GLOBALS[" . my_var_export($var) . ']=' . my_var_export($val) . ";");
 	}
 	fclose($fp);
 }
 echo "\n*** UTF-8 normalization tables done\n\n";
 /**
 * Now we'll generate the files needed by the search indexer
 */
 echo "Generating search indexer tables\n";
@ -424,32 +173,6 @@ die("\nAll done!\n");
 //                             Internal functions                             //
 ////////////////////////////////////////////////////////////////////////////////
 /**
 * Decompose a sequence recusively
 *
 * @param	array	$decomp_map	Decomposition mapping, passed by reference
 * @param	string	$decomp_seq	Decomposition sequence as decimal codepoints separated with a space
 * @return	string				Decomposition sequence, fully decomposed
 */
 function decompose(&$decomp_map, $decomp_seq)
 {
 	$ret = array();
 	foreach (explode(' ', $decomp_seq) as $cp)
 	{
 		if (isset($decomp_map[$cp]))
 		{
 			$ret[] = decompose($decomp_map, $decomp_map[$cp]);
 		}
 		else
 		{
 			$ret[] = $cp;
 		}
 	}
 	return implode(' ', $ret);
 }
 /**
 * Return a parsable string representation of a variable
 *
@ -537,17 +260,6 @@ function hex_to_utf($hex)
 	return cp_to_utf(hexdec($hex));
 }
 /**
 * Return a UTF string formed from a sequence of codepoints in hexadecimal
 *
 * @param	string	$seq		Sequence of codepoints, separated with a space
 * @return	string				UTF-8 string
 */
 function hexseq_to_utf($seq)
 {
 	return implode('', array_map('hex_to_utf', explode(' ', $seq)));
 }
 /**
 * Convert a codepoint to a UTF-8 char
 *
--- a/phpBB/develop/unicode_testing.php
+++ b/phpBB/develop/unicode_testing.php
@ -81,38 +81,3 @@ function utf8_to_unicode_callback($m)
 {
 	return '\u' . str_pad(base_convert(utf8_ord($m[0]), 10, 16), 4, '0', STR_PAD_LEFT) . '';
 }
 /**
 * A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
 * to be in NFKC
 *
 * @param	mixed	$strings	a string or an array of strings to normalize
 * @return	mixed				the normalized content, preserving array keys if array given.
 */
 function utf8_normalize_nfkc($strings)
 {
 	if (empty($strings))
 	{
 		return $strings;
 	}
 	if (!class_exists('utf_normalizer'))
 	{
 		global $phpbb_root_path, $phpEx;
 		include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 	}
 	if (!is_array($strings))
 	{
 		utf_normalizer::nfkc($strings);
 	}
 	else if (is_array($strings))
 	{
 		foreach ($strings as $key => $string)
 		{
 			utf_normalizer::nfkc($strings[$key]);
 		}
 	}
 	return $strings;
 }
--- a/phpBB/develop/utf_normalizer_test.php
+++ b/phpBB/develop/utf_normalizer_test.php
@ -1,394 +0,0 @@
 <?php
 /**
 *
 * This file is part of the phpBB Forum Software package.
 *
 * @copyright (c) phpBB Limited <https://www.phpbb.com>
 * @license GNU General Public License, version 2 (GPL-2.0)
 *
 * For full copyright and license information, please see
 * the docs/CREDITS.txt file.
 *
 */
 if (php_sapi_name() != 'cli')
 {
 	die("This program must be run from the command line.\n");
 }
 //
 // Security message:
 //
 // This script is potentially dangerous.
 // Remove or comment the next line (die(".... ) to enable this script.
 // Do NOT FORGET to either remove this script or disable it after you have used it.
 //
 die("Please read the first lines of this script for instructions on how to enable it");
 set_time_limit(0);
 error_reporting(E_ALL);
 define('IN_PHPBB', true);
 $phpbb_root_path = '../';
 $phpEx = substr(strrchr(__FILE__, '.'), 1);
 /**
 * Let's download some files we need
 */
 download('http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt');
 download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt');
 /**
 * Those are the tests we run
 */
 $test_suite = array(
 	/**
 	* NFC
 	*   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
 	*   c4 ==  NFC(c4) ==  NFC(c5)
 	*/
 	'NFC'	=>	array(
 		'c2'	=>	array('c1', 'c2', 'c3'),
 		'c4'	=>	array('c4', 'c5')
 	),
 	/**
 	* NFD
 	*   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
 	*   c5 ==  NFD(c4) ==  NFD(c5)
 	*/
 	'NFD'	=>	array(
 		'c3'	=>	array('c1', 'c2', 'c3'),
 		'c5'	=>	array('c4', 'c5')
 	),
 	/**
 	* NFKC
 	*   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
 	*/
 	'NFKC'	=>	array(
 		'c4'	=>	array('c1', 'c2', 'c3', 'c4', 'c5')
 	),
 	/**
 	* NFKD
 	*   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
 	*/
 	'NFKD'	=>	array(
 		'c5'	=>	array('c1', 'c2', 'c3', 'c4', 'c5')
 	)
 );
 require_once($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 $i = $n = 0;
 $failed = false;
 $tested_chars = array();
 $fp = fopen($phpbb_root_path . 'develop/NormalizationTest.txt', 'rb');
 while (!feof($fp))
 {
 	$line = fgets($fp);
 	++$n;
 	if ($line[0] == '@')
 	{
 		if ($i)
 		{
 			echo "done\n";
 		}
 		$i = 0;
 		echo "\n", substr($line, 1), "\n\n";
 		continue;
 	}
 	if (!strpos(' 0123456789ABCDEF', $line[0]))
 	{
 		continue;
 	}
 	if (++$i % 100 == 0)
 	{
 		echo $i, ' ';
 	}
 	list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
 	if (!strpos($c1, ' '))
 	{
 		/**
 		* We are currently testing a single character, we add it to the list of
 		* characters we have processed so that we can exclude it when testing
 		* for invariants
 		*/
 		$tested_chars[$c1] = 1;
 	}
 	foreach ($test_suite as $form => $serie)
 	{
 		foreach ($serie as $expected => $tests)
 		{
 			$hex_expected = ${$expected};
 			$utf_expected = hexseq_to_utf($hex_expected);
 			foreach ($tests as $test)
 			{
 				$utf_result = $utf_expected;
 				call_user_func(array('utf_normalizer', $form), $utf_result);
 				if (strcmp($utf_expected, $utf_result))
 				{
 					$failed = true;
 					$hex_result = utf_to_hexseq($utf_result);
 					echo "\nFAILED $expected == $form($test) ($hex_expected != $hex_result)";
 				}
 			}
 		}
 		if ($failed)
 		{
 			die("\n\nFailed at line $n\n");
 		}
 	}
 }
 fclose($fp);
 /**
 * Test for invariants
 */
 echo "\n\nTesting for invariants...\n\n";
 $fp = fopen($phpbb_root_path . 'develop/UnicodeData.txt', 'rt');
 $n = 0;
 while (!feof($fp))
 {
 	if (++$n % 100 == 0)
 	{
 		echo $n, ' ';
 	}
 	$line = fgets($fp, 1024);
 	if (!$pos = strpos($line, ';'))
 	{
 		continue;
 	}
 	$hex_tested = $hex_expected = substr($line, 0, $pos);
 	if (isset($tested_chars[$hex_tested]))
 	{
 		continue;
 	}
 	$utf_expected = hex_to_utf($hex_expected);
 	if ($utf_expected >= UTF8_SURROGATE_FIRST
 	 && $utf_expected <= UTF8_SURROGATE_LAST)
 	{
 		/**
 		* Surrogates are illegal on their own, we expect the normalizer
 		* to return a replacement char
 		*/
 		$utf_expected = UTF8_REPLACEMENT;
 		$hex_expected = utf_to_hexseq($utf_expected);
 	}
 	foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
 	{
 		$utf_result = $utf_expected;
 		utf_normalizer::$form($utf_result);
 		$hex_result = utf_to_hexseq($utf_result);
 //		echo "$form($utf_expected) == $utf_result\n";
 		if (strcmp($utf_expected, $utf_result))
 		{
 			$failed = 1;
 			echo "\nFAILED $hex_expected == $form($hex_tested) ($hex_expected != $hex_result)";
 		}
 	}
 	if ($failed)
 	{
 		die("\n\nFailed at line $n\n");
 	}
 }
 fclose($fp);
 die("\n\nALL TESTS PASSED SUCCESSFULLY\n");
 /**
 * Download a file to the develop/ dir
 *
 * @param	string	$url		URL of the file to download
 * @return	null
 */
 function download($url)
 {
 	global $phpbb_root_path;
 	if (file_exists($phpbb_root_path . 'develop/' . basename($url)))
 	{
 		return;
 	}
 	echo 'Downloading from ', $url, ' ';
 	if (!$fpr = fopen($url, 'rb'))
 	{
 		die("Can't download from $url\nPlease download it yourself and put it in the develop/ dir, kthxbai");
 	}
 	if (!$fpw = fopen($phpbb_root_path . 'develop/' . basename($url), 'wb'))
 	{
 		die("Can't open develop/" . basename($url) . " for output... please check your permissions or something");
 	}
 	$i = 0;
 	$chunk = 32768;
 	$done = '';
 	while (!feof($fpr))
 	{
 		$i += fwrite($fpw, fread($fpr, $chunk));
 		echo str_repeat("\x08", strlen($done));
 		$done = ($i >> 10) . ' KiB';
 		echo $done;
 	}
 	fclose($fpr);
 	fclose($fpw);
 	echo "\n";
 }
 /**
 * Convert a UTF string to a sequence of codepoints in hexadecimal
 *
 * @param	string	$utf	UTF string
 * @return	integer			Unicode codepoints in hex
 */
 function utf_to_hexseq($str)
 {
 	$pos = 0;
 	$len = strlen($str);
 	$ret = array();
 	while ($pos < $len)
 	{
 		$c = $str[$pos];
 		switch ($c & "\xF0")
 		{
 			case "\xC0":
 			case "\xD0":
 				$utf_char = substr($str, $pos, 2);
 				$pos += 2;
 				break;
 			case "\xE0":
 				$utf_char = substr($str, $pos, 3);
 				$pos += 3;
 				break;
 			case "\xF0":
 				$utf_char = substr($str, $pos, 4);
 				$pos += 4;
 				break;
 			default:
 				$utf_char = $c;
 				++$pos;
 		}
 		$hex = dechex(utf_to_cp($utf_char));
 		if (!isset($hex[3]))
 		{
 			$hex = substr('000' . $hex, -4);
 		}
 		$ret[] = $hex;
 	}
 	return strtr(implode(' ', $ret), 'abcdef', 'ABCDEF');
 }
 /**
 * Convert a UTF-8 char to its codepoint
 *
 * @param	string	$utf_char	UTF-8 char
 * @return	integer				Unicode codepoint
 */
 function utf_to_cp($utf_char)
 {
 	switch (strlen($utf_char))
 	{
 		case 1:
 			return ord($utf_char);
 		case 2:
 			return ((ord($utf_char[0]) & 0x1F) << 6) | (ord($utf_char[1]) & 0x3F);
 		case 3:
 			return ((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F);
 		case 4:
 			return ((ord($utf_char[0]) & 0x07) << 18) | ((ord($utf_char[1]) & 0x3F) << 12) | ((ord($utf_char[2]) & 0x3F) << 6) | (ord($utf_char[3]) & 0x3F);
 		default:
 			die('UTF-8 chars can only be 1-4 bytes long');
 	}
 }
 /**
 * Return a UTF string formed from a sequence of codepoints in hexadecimal
 *
 * @param	string	$seq		Sequence of codepoints, separated with a space
 * @return	string				UTF-8 string
 */
 function hexseq_to_utf($seq)
 {
 	return implode('', array_map('hex_to_utf', explode(' ', $seq)));
 }
 /**
 * Convert a codepoint in hexadecimal to a UTF-8 char
 *
 * @param	string	$hex		Codepoint, in hexadecimal
 * @return	string				UTF-8 char
 */
 function hex_to_utf($hex)
 {
 	return cp_to_utf(hexdec($hex));
 }
 /**
 * Convert a codepoint to a UTF-8 char
 *
 * @param	integer	$cp			Unicode codepoint
 * @return	string				UTF-8 string
 */
 function cp_to_utf($cp)
 {
 	if ($cp > 0xFFFF)
 	{
 		return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 	}
 	else if ($cp > 0x7FF)
 	{
 		return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 	}
 	else if ($cp > 0x7F)
 	{
 		return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
 	}
 	else
 	{
 		return chr($cp);
 	}
 }
--- a/phpBB/includes/utf/data/utf_canonical_comp.php
+++ b/phpBB/includes/utf/data/utf_canonical_comp.php
--- a/phpBB/includes/utf/data/utf_canonical_decomp.php
+++ b/phpBB/includes/utf/data/utf_canonical_decomp.php
--- a/phpBB/includes/utf/data/utf_compatibility_decomp.php
+++ b/phpBB/includes/utf/data/utf_compatibility_decomp.php
--- a/phpBB/includes/utf/data/utf_nfc_qc.php
+++ b/phpBB/includes/utf/data/utf_nfc_qc.php
--- a/phpBB/includes/utf/data/utf_nfkc_qc.php
+++ b/phpBB/includes/utf/data/utf_nfkc_qc.php
--- a/phpBB/includes/utf/data/utf_normalizer_common.php
+++ b/phpBB/includes/utf/data/utf_normalizer_common.php
@ -1,4 +0,0 @@
 <?php
 $GLOBALS['utf_jamo_index']=array('ᄀ'=>44032,'ᄁ'=>44620,'ᄂ'=>45208,'ᄃ'=>45796,'ᄄ'=>46384,'ᄅ'=>46972,'ᄆ'=>47560,'ᄇ'=>48148,'ᄈ'=>48736,'ᄉ'=>49324,'ᄊ'=>49912,'ᄋ'=>50500,'ᄌ'=>51088,'ᄍ'=>51676,'ᄎ'=>52264,'ᄏ'=>52852,'ᄐ'=>53440,'ᄑ'=>54028,'ᄒ'=>54616,'ᅡ'=>0,'ᅢ'=>28,'ᅣ'=>56,'ᅤ'=>84,'ᅥ'=>112,'ᅦ'=>140,'ᅧ'=>168,'ᅨ'=>196,'ᅩ'=>224,'ᅪ'=>252,'ᅫ'=>280,'ᅬ'=>308,'ᅭ'=>336,'ᅮ'=>364,'ᅯ'=>392,'ᅰ'=>420,'ᅱ'=>448,'ᅲ'=>476,'ᅳ'=>504,'ᅴ'=>532,'ᅵ'=>560,'ᆧ'=>0,'ᆨ'=>1,'ᆩ'=>2,'ᆪ'=>3,'ᆫ'=>4,'ᆬ'=>5,'ᆭ'=>6,'ᆮ'=>7,'ᆯ'=>8,'ᆰ'=>9,'ᆱ'=>10,'ᆲ'=>11,'ᆳ'=>12,'ᆴ'=>13,'ᆵ'=>14,'ᆶ'=>15,'ᆷ'=>16,'ᆸ'=>17,'ᆹ'=>18,'ᆺ'=>19,'ᆻ'=>20,'ᆼ'=>21,'ᆽ'=>22,'ᆾ'=>23,'ᆿ'=>24,'ᇀ'=>25,'ᇁ'=>26,'ᇂ'=>27);
 $GLOBALS['utf_jamo_type']=array('ᄀ'=>0,'ᄁ'=>0,'ᄂ'=>0,'ᄃ'=>0,'ᄄ'=>0,'ᄅ'=>0,'ᄆ'=>0,'ᄇ'=>0,'ᄈ'=>0,'ᄉ'=>0,'ᄊ'=>0,'ᄋ'=>0,'ᄌ'=>0,'ᄍ'=>0,'ᄎ'=>0,'ᄏ'=>0,'ᄐ'=>0,'ᄑ'=>0,'ᄒ'=>0,'ᅡ'=>1,'ᅢ'=>1,'ᅣ'=>1,'ᅤ'=>1,'ᅥ'=>1,'ᅦ'=>1,'ᅧ'=>1,'ᅨ'=>1,'ᅩ'=>1,'ᅪ'=>1,'ᅫ'=>1,'ᅬ'=>1,'ᅭ'=>1,'ᅮ'=>1,'ᅯ'=>1,'ᅰ'=>1,'ᅱ'=>1,'ᅲ'=>1,'ᅳ'=>1,'ᅴ'=>1,'ᅵ'=>1,'ᆧ'=>2,'ᆨ'=>2,'ᆩ'=>2,'ᆪ'=>2,'ᆫ'=>2,'ᆬ'=>2,'ᆭ'=>2,'ᆮ'=>2,'ᆯ'=>2,'ᆰ'=>2,'ᆱ'=>2,'ᆲ'=>2,'ᆳ'=>2,'ᆴ'=>2,'ᆵ'=>2,'ᆶ'=>2,'ᆷ'=>2,'ᆸ'=>2,'ᆹ'=>2,'ᆺ'=>2,'ᆻ'=>2,'ᆼ'=>2,'ᆽ'=>2,'ᆾ'=>2,'ᆿ'=>2,'ᇀ'=>2,'ᇁ'=>2,'ᇂ'=>2);
 $GLOBALS['utf_combining_class']=array('̀'=>230,'́'=>230,'̂'=>230,'̃'=>230,'̄'=>230,'̅'=>230,'̆'=>230,'̇'=>230,'̈'=>230,'̉'=>230,'̊'=>230,'̋'=>230,'̌'=>230,'̍'=>230,'̎'=>230,'̏'=>230,'̐'=>230,'̑'=>230,'̒'=>230,'̓'=>230,'̔'=>230,'̕'=>232,'̖'=>220,'̗'=>220,'̘'=>220,'̙'=>220,'̚'=>232,'̛'=>216,'̜'=>220,'̝'=>220,'̞'=>220,'̟'=>220,'̠'=>220,'̡'=>202,'̢'=>202,'̣'=>220,'̤'=>220,'̥'=>220,'̦'=>220,'̧'=>202,'̨'=>202,'̩'=>220,'̪'=>220,'̫'=>220,'̬'=>220,'̭'=>220,'̮'=>220,'̯'=>220,'̰'=>220,'̱'=>220,'̲'=>220,'̳'=>220,'̴'=>1,'̵'=>1,'̶'=>1,'̷'=>1,'̸'=>1,'̹'=>220,'̺'=>220,'̻'=>220,'̼'=>220,'̽'=>230,'̾'=>230,'̿'=>230,'̀'=>230,'́'=>230,'͂'=>230,'̓'=>230,'̈́'=>230,'ͅ'=>240,'͆'=>230,'͇'=>220,'͈'=>220,'͉'=>220,'͊'=>230,'͋'=>230,'͌'=>230,'͍'=>220,'͎'=>220,'͐'=>230,'͑'=>230,'͒'=>230,'͓'=>220,'͔'=>220,'͕'=>220,'͖'=>220,'͗'=>230,'͘'=>232,'͙'=>220,'͚'=>220,'͛'=>230,'͜'=>233,'͝'=>234,'͞'=>234,'͟'=>233,'͠'=>234,'͡'=>234,'͢'=>233,'ͣ'=>230,'ͤ'=>230,'ͥ'=>230,'ͦ'=>230,'ͧ'=>230,'ͨ'=>230,'ͩ'=>230,'ͪ'=>230,'ͫ'=>230,'ͬ'=>230,'ͭ'=>230,'ͮ'=>230,'ͯ'=>230,'҃'=>230,'҄'=>230,'҅'=>230,'҆'=>230,'֑'=>220,'֒'=>230,'֓'=>230,'֔'=>230,'֕'=>230,'֖'=>220,'֗'=>230,'֘'=>230,'֙'=>230,'֚'=>222,'֛'=>220,'֜'=>230,'֝'=>230,'֞'=>230,'֟'=>230,'֠'=>230,'֡'=>230,'֢'=>220,'֣'=>220,'֤'=>220,'֥'=>220,'֦'=>220,'֧'=>220,'֨'=>230,'֩'=>230,'֪'=>220,'֫'=>230,'֬'=>230,'֭'=>222,'֮'=>228,'֯'=>230,'ְ'=>10,'ֱ'=>11,'ֲ'=>12,'ֳ'=>13,'ִ'=>14,'ֵ'=>15,'ֶ'=>16,'ַ'=>17,'ָ'=>18,'ֹ'=>19,'ֺ'=>19,'ֻ'=>20,'ּ'=>21,'ֽ'=>22,'ֿ'=>23,'ׁ'=>24,'ׂ'=>25,'ׄ'=>230,'ׅ'=>220,'ׇ'=>18,'ؐ'=>230,'ؑ'=>230,'ؒ'=>230,'ؓ'=>230,'ؔ'=>230,'ؕ'=>230,'ً'=>27,'ٌ'=>28,'ٍ'=>29,'َ'=>30,'ُ'=>31,'ِ'=>32,'ّ'=>33,'ْ'=>34,'ٓ'=>230,'ٔ'=>230,'ٕ'=>220,'ٖ'=>220,'ٗ'=>230,'٘'=>230,'ٙ'=>230,'ٚ'=>230,'ٛ'=>230,'ٜ'=>220,'ٝ'=>230,'ٞ'=>230,'ٰ'=>35,'ۖ'=>230,'ۗ'=>230,'ۘ'=>230,'ۙ'=>230,'ۚ'=>230,'ۛ'=>230,'ۜ'=>230,'۟'=>230,'۠'=>230,'ۡ'=>230,'ۢ'=>230,'ۣ'=>220,'ۤ'=>230,'ۧ'=>230,'ۨ'=>230,'۪'=>220,'۫'=>230,'۬'=>230,'ۭ'=>220,'ܑ'=>36,'ܰ'=>230,'ܱ'=>220,'ܲ'=>230,'ܳ'=>230,'ܴ'=>220,'ܵ'=>230,'ܶ'=>230,'ܷ'=>220,'ܸ'=>220,'ܹ'=>220,'ܺ'=>230,'ܻ'=>220,'ܼ'=>220,'ܽ'=>230,'ܾ'=>220,'ܿ'=>230,'݀'=>230,'݁'=>230,'݂'=>220,'݃'=>230,'݄'=>220,'݅'=>230,'݆'=>220,'݇'=>230,'݈'=>220,'݉'=>230,'݊'=>230,'߫'=>230,'߬'=>230,'߭'=>230,'߮'=>230,'߯'=>230,'߰'=>230,'߱'=>230,'߲'=>220,'߳'=>230,'़'=>7,'्'=>9,'॑'=>230,'॒'=>220,'॓'=>230,'॔'=>230,'়'=>7,'্'=>9,'਼'=>7,'੍'=>9,'઼'=>7,'્'=>9,'଼'=>7,'୍'=>9,'்'=>9,'్'=>9,'ౕ'=>84,'ౖ'=>91,'಼'=>7,'್'=>9,'്'=>9,'්'=>9,'ุ'=>103,'ู'=>103,'ฺ'=>9,'่'=>107,'้'=>107,'๊'=>107,'๋'=>107,'ຸ'=>118,'ູ'=>118,'່'=>122,'້'=>122,'໊'=>122,'໋'=>122,'༘'=>220,'༙'=>220,'༵'=>220,'༷'=>220,'༹'=>216,'ཱ'=>129,'ི'=>130,'ུ'=>132,'ེ'=>130,'ཻ'=>130,'ོ'=>130,'ཽ'=>130,'ྀ'=>130,'ྂ'=>230,'ྃ'=>230,'྄'=>9,'྆'=>230,'྇'=>230,'࿆'=>220,'့'=>7,'္'=>9,'፟'=>230,'᜔'=>9,'᜴'=>9,'្'=>9,'៝'=>230,'ᢩ'=>228,'᤹'=>222,'᤺'=>230,'᤻'=>220,'ᨗ'=>230,'ᨘ'=>220,'᬴'=>7,'᭄'=>9,'᭫'=>230,'᭬'=>220,'᭭'=>230,'᭮'=>230,'᭯'=>230,'᭰'=>230,'᭱'=>230,'᭲'=>230,'᭳'=>230,'᷀'=>230,'᷁'=>230,'᷂'=>220,'᷃'=>230,'᷄'=>230,'᷅'=>230,'᷆'=>230,'᷇'=>230,'᷈'=>230,'᷉'=>230,'᷊'=>220,'᷾'=>230,'᷿'=>220,'⃐'=>230,'⃑'=>230,'⃒'=>1,'⃓'=>1,'⃔'=>230,'⃕'=>230,'⃖'=>230,'⃗'=>230,'⃘'=>1,'⃙'=>1,'⃚'=>1,'⃛'=>230,'⃜'=>230,'⃡'=>230,'⃥'=>1,'⃦'=>1,'⃧'=>230,'⃨'=>220,'⃩'=>230,'⃪'=>1,'⃫'=>1,'⃬'=>220,'⃭'=>220,'⃮'=>220,'⃯'=>220,'〪'=>218,'〫'=>228,'〬'=>232,'〭'=>222,'〮'=>224,'〯'=>224,'゙'=>8,'゚'=>8,'꠆'=>9,'ﬞ'=>26,'︠'=>230,'︡'=>230,'︢'=>230,'︣'=>230,'𐨍'=>220,'𐨏'=>230,'𐨸'=>230,'𐨹'=>1,'𐨺'=>220,'𐨿'=>9,'𝅥'=>216,'𝅦'=>216,'𝅧'=>1,'𝅨'=>1,'𝅩'=>1,'𝅭'=>226,'𝅮'=>216,'𝅯'=>216,'𝅰'=>216,'𝅱'=>216,'𝅲'=>216,'𝅻'=>220,'𝅼'=>220,'𝅽'=>220,'𝅾'=>220,'𝅿'=>220,'𝆀'=>220,'𝆁'=>220,'𝆂'=>220,'𝆅'=>230,'𝆆'=>230,'𝆇'=>230,'𝆈'=>230,'𝆉'=>230,'𝆊'=>220,'𝆋'=>220,'𝆪'=>230,'𝆫'=>230,'𝆬'=>230,'𝆭'=>230,'𝉂'=>230,'𝉃'=>230,'𝉄'=>230);
--- a/phpBB/includes/utf/utf_normalizer.php
+++ b/phpBB/includes/utf/utf_normalizer.php
--- a/phpBB/includes/utf/utf_tools.php
+++ b/phpBB/includes/utf/utf_tools.php
@ -21,6 +21,13 @@ if (!defined('IN_PHPBB'))
 // Enforce ASCII only string handling
 setlocale(LC_CTYPE, 'C');
 /**
 * Setup the UTF-8 portability layer
 */
 Patchwork\Utf8\Bootup::initUtf8Encode();
 Patchwork\Utf8\Bootup::initMbstring();
 Patchwork\Utf8\Bootup::initIntl();
 /**
 * UTF-8 tools
 *
@ -29,544 +36,85 @@ setlocale(LC_CTYPE, 'C');
 *
 */
-if (!extension_loaded('xml'))
+/**
 * UTF-8 aware alternative to strrpos
 * @ignore
 */
 function utf8_strrpos($str,	$needle, $offset = null)
 {
-	/**
+	// Emulate behaviour of strrpos rather than raising warning
-	* Implementation of PHP's native utf8_encode for people without XML support
+	if (empty($str))
 	* This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
 	*
 	* @param string $str ISO-8859-1 encoded data
 	* @return string UTF-8 encoded data
 	*/
 	function utf8_encode($str)
 	{
-		$out = '';
+		return false;
 		for ($i = 0, $len = strlen($str); $i < $len; $i++)
 		{
 			$letter = $str[$i];
 			$num = ord($letter);
 			if ($num < 0x80)
 			{
 				$out .= $letter;
 			}
 			else if ($num < 0xC0)
 			{
 				$out .= "\xC2" . $letter;
 			}
 			else
 			{
 				$out .= "\xC3" . chr($num - 64);
 			}
 		}
 		return $out;
 	}
-	/**
+	if (is_null($offset))
 	* Implementation of PHP's native utf8_decode for people without XML support
 	*
 	* @param string $str UTF-8 encoded data
 	* @return string ISO-8859-1 encoded data
 	*/
 	function utf8_decode($str)
 	{
-		$pos = 0;
+		return mb_strrpos($str, $needle);
-		$len = strlen($str);
+	}
-		$ret = '';
+	else
-
+	{
-		while ($pos < $len)
+		return mb_strrpos($str, $needle, $offset);
 		{
 			$ord = ord($str[$pos]) & 0xF0;
 			if ($ord === 0xC0 || $ord === 0xD0)
 			{
 				$charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
 				$pos += 2;
 				$ret .= (($charval < 256) ? chr($charval) : '?');
 			}
 			else if ($ord === 0xE0)
 			{
 				$ret .= '?';
 				$pos += 3;
 			}
 			else if ($ord === 0xF0)
 			{
 				$ret .= '?';
 				$pos += 4;
 			}
 			else
 			{
 				$ret .= $str[$pos];
 				++$pos;
 			}
 		}
 		return $ret;
 	}
 }
-// mbstring is old and has it's functions around for older versions of PHP.
+/**
-// if mbstring is not loaded, we go into native mode.
+* UTF-8 aware alternative to strpos
-if (extension_loaded('mbstring'))
+* @ignore
 */
 function utf8_strpos($str, $needle, $offset = null)
 {
-	mb_internal_encoding('UTF-8');
+	if (is_null($offset))
 	/**
 	* UTF-8 aware alternative to strrpos
 	* Find position of last occurrence of a char in a string
 	*/
 	/**
 	* UTF-8 aware alternative to strrpos
 	* @ignore
 	*/
 	function utf8_strrpos($str,	$needle, $offset = null)
 	{
-		// Emulate behaviour of strrpos rather than raising warning
+		return mb_strpos($str, $needle);
 		if (empty($str))
 		{
 			return false;
 		}
 		if (is_null($offset))
 		{
 			return mb_strrpos($str, $needle);
 		}
 		else
 		{
 			return mb_strrpos($str, $needle, $offset);
 		}
 	}
-
+	else
 	/**
 	* UTF-8 aware alternative to strpos
 	* @ignore
 	*/
 	function utf8_strpos($str, $needle, $offset = null)
 	{
-		if (is_null($offset))
+		return mb_strpos($str, $needle, $offset);
 		{
 			return mb_strpos($str, $needle);
 		}
 		else
 		{
 			return mb_strpos($str, $needle, $offset);
 		}
 	}
 	/**
 	* UTF-8 aware alternative to strtolower
 	* @ignore
 	*/
 	function utf8_strtolower($str)
 	{
 		return mb_strtolower($str);
 	}
 	/**
 	* UTF-8 aware alternative to strtoupper
 	* @ignore
 	*/
 	function utf8_strtoupper($str)
 	{
 		return mb_strtoupper($str);
 	}
 	/**
 	* UTF-8 aware alternative to substr
 	* @ignore
 	*/
 	function utf8_substr($str, $offset, $length = null)
 	{
 		if (is_null($length))
 		{
 			return mb_substr($str, $offset);
 		}
 		else
 		{
 			return mb_substr($str, $offset, $length);
 		}
 	}
 	/**
 	* Return the length (in characters) of a UTF-8 string
 	* @ignore
 	*/
 	function utf8_strlen($text)
 	{
 		return mb_strlen($text, 'utf-8');
 	}
 }
-else
+
 /**
 * UTF-8 aware alternative to strtolower
 * @ignore
 */
 function utf8_strtolower($str)
 {
-	/**
+	return mb_strtolower($str);
-	* UTF-8 aware alternative to strrpos
+}
-	* Find position of last occurrence of a char in a string
+
-	*
+/**
-	* @author Harry Fuecks
+* UTF-8 aware alternative to strtoupper
-	* @param string $str haystack
+* @ignore
-	* @param string $needle needle
+*/
-	* @param integer $offset (optional) offset (from left)
+function utf8_strtoupper($str)
-	* @return mixed integer position or FALSE on failure
+{
-	*/
+	return mb_strtoupper($str);
-	function utf8_strrpos($str,	$needle, $offset = null)
+}
 /**
 * UTF-8 aware alternative to substr
 * @ignore
 */
 function utf8_substr($str, $offset, $length = null)
 {
 	if (is_null($length))
 	{
-		if (is_null($offset))
+		return mb_substr($str, $offset);
 		{
 			$ar	= explode($needle, $str);
 			if (sizeof($ar) > 1)
 			{
 				// Pop off the end of the string where the last	match was made
 				array_pop($ar);
 				$str = join($needle, $ar);
 				return utf8_strlen($str);
 			}
 			return false;
 		}
 		else
 		{
 			if (!is_int($offset))
 			{
 				trigger_error('utf8_strrpos	expects	parameter 3	to be long', E_USER_ERROR);
 				return false;
 			}
 			$str = utf8_substr($str, $offset);
 			if (false !== ($pos = utf8_strrpos($str, $needle)))
 			{
 				return $pos	+ $offset;
 			}
 			return false;
 		}
 	}
-
+	else
 	/**
 	* UTF-8 aware alternative to strpos
 	* Find position of first occurrence of a string
 	*
 	* @author Harry Fuecks
 	* @param string $str haystack
 	* @param string $needle needle
 	* @param integer $offset offset in characters (from left)
 	* @return mixed integer position or FALSE on failure
 	*/
 	function utf8_strpos($str, $needle, $offset = null)
 	{
-		if (is_null($offset))
+		return mb_substr($str, $offset, $length);
 		{
 			$ar = explode($needle, $str);
 			if (sizeof($ar) > 1)
 			{
 				return utf8_strlen($ar[0]);
 			}
 			return false;
 		}
 		else
 		{
 			if (!is_int($offset))
 			{
 				trigger_error('utf8_strpos:  Offset must  be an integer', E_USER_ERROR);
 				return false;
 			}
 			$str = utf8_substr($str, $offset);
 			if (false !== ($pos = utf8_strpos($str, $needle)))
 			{
 				return $pos + $offset;
 			}
 			return false;
 		}
 	}
 }
-	/**
+/**
-	* UTF-8 aware alternative to strtolower
+* Return the length (in characters) of a UTF-8 string
-	* Make a string lowercase
+* @ignore
-	* Note: The concept of a characters "case" only exists is some alphabets
+*/
-	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
+function utf8_strlen($text)
-	* not exist in the Chinese alphabet, for example. See Unicode Standard
+{
-	* Annex #21: Case Mappings
+	return mb_strlen($text, 'utf-8');
 	*
 	* @param string
 	* @return string string in lowercase
 	*/
 	function utf8_strtolower($string)
 	{
 		static $utf8_upper_to_lower = array(
 			"\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
 			"\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
 			"\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
 			"\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
 			"\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
 			"\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
 			"\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
 			"\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
 			"\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
 			"\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
 			"\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
 			"\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
 			"\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
 			"\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
 			"\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
 			"\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
 			"\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
 			"\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
 			"\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
 			"\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
 			"\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
 			"\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
 			"\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
 			"\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
 			"\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
 			"\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
 			"\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
 			"\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
 			"\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
 			"\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
 			"\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
 			"\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
 			"\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
 			"\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
 			"\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
 			"\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
 			"\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
 			"\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
 			"\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
 			"\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
 			"\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
 			"\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
 			"\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
 			"\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
 			"\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
 			"\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
 		);
 		return strtr(strtolower($string), $utf8_upper_to_lower);
 	}
 	/**
 	* UTF-8 aware alternative to strtoupper
 	* Make a string uppercase
 	* Note: The concept of a characters "case" only exists is some alphabets
 	* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 	* not exist in the Chinese alphabet, for example. See Unicode Standard
 	* Annex #21: Case Mappings
 	*
 	* @param string
 	* @return string string in uppercase
 	*/
 	function utf8_strtoupper($string)
 	{
 		static $utf8_lower_to_upper = array(
 			"\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
 			"\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
 			"\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
 			"\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
 			"\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
 			"\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
 			"\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
 			"\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
 			"\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
 			"\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
 			"\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
 			"\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
 			"\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
 			"\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
 			"\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
 			"\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
 			"\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
 			"\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
 			"\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
 			"\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
 			"\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
 			"\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
 			"\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
 			"\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
 			"\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
 			"\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
 			"\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
 			"\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
 			"\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
 			"\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
 			"\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
 			"\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
 			"\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
 			"\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
 			"\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
 			"\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
 			"\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
 			"\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
 			"\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
 			"\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
 			"\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
 			"\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
 			"\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
 			"\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
 			"\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
 			"\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
 		);
 		return strtr(strtoupper($string), $utf8_lower_to_upper);
 	}
 	/**
 	* UTF-8 aware alternative to substr
 	* Return part of a string given character offset (and optionally length)
 	*
 	* Note arguments: comparied to substr - if offset or length are
 	* not integers, this version will not complain but rather massages them
 	* into an integer.
 	*
 	* Note on returned values: substr documentation states false can be
 	* returned in some cases (e.g. offset > string length)
 	* mb_substr never returns false, it will return an empty string instead.
 	* This adopts the mb_substr approach
 	*
 	* Note on implementation: PCRE only supports repetitions of less than
 	* 65536, in order to accept up to MAXINT values for offset and length,
 	* we'll repeat a group of 65535 characters when needed.
 	*
 	* Note on implementation: calculating the number of characters in the
 	* string is a relatively expensive operation, so we only carry it out when
 	* necessary. It isn't necessary for +ve offsets and no specified length
 	*
 	* @author Chris Smith<chris@jalakai.co.uk>
 	* @param string $str
 	* @param integer $offset number of UTF-8 characters offset (from left)
 	* @param integer $length (optional) length in UTF-8 characters from offset
 	* @return mixed string or FALSE if failure
 	*/
 	function utf8_substr($str, $offset, $length = NULL)
 	{
 		// generates E_NOTICE
 		// for PHP4 objects, but not PHP5 objects
 		$str = (string) $str;
 		$offset = (int) $offset;
 		if (!is_null($length))
 		{
 			$length = (int) $length;
 		}
 		// handle trivial cases
 		if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
 		{
 			return '';
 		}
 		// normalise negative offsets (we could use a tail
 		// anchored pattern, but they are horribly slow!)
 		if ($offset < 0)
 		{
 			// see notes
 			$strlen = utf8_strlen($str);
 			$offset = $strlen + $offset;
 			if ($offset < 0)
 			{
 				$offset = 0;
 			}
 		}
 		$op = '';
 		$lp = '';
 		// establish a pattern for offset, a
 		// non-captured group equal in length to offset
 		if ($offset > 0)
 		{
 			$ox = (int) ($offset / 65535);
 			$oy = $offset % 65535;
 			if ($ox)
 			{
 				$op = '(?:.{65535}){' . $ox . '}';
 			}
 			$op = '^(?:' . $op . '.{' . $oy . '})';
 		}
 		else
 		{
 			// offset == 0; just anchor the pattern
 			$op = '^';
 		}
 		// establish a pattern for length
 		if (is_null($length))
 		{
 			// the rest of the string
 			$lp = '(.*)$';
 		}
 		else
 		{
 			if (!isset($strlen))
 			{
 				// see notes
 				$strlen = utf8_strlen($str);
 			}
 			// another trivial case
 			if ($offset > $strlen)
 			{
 				return '';
 			}
 			if ($length > 0)
 			{
 				// reduce any length that would
 				// go passed the end of the string
 				$length = min($strlen - $offset, $length);
 				$lx = (int) ($length / 65535);
 				$ly = $length % 65535;
 				// negative length requires a captured group
 				// of length characters
 				if ($lx)
 				{
 					$lp = '(?:.{65535}){' . $lx . '}';
 				}
 				$lp = '(' . $lp . '.{'. $ly . '})';
 			}
 			else if ($length < 0)
 			{
 				if ($length < ($offset - $strlen))
 				{
 					return '';
 				}
 				$lx = (int) ((-$length) / 65535);
 				$ly = (-$length) % 65535;
 				// negative length requires ... capture everything
 				// except a group of  -length characters
 				// anchored at the tail-end of the string
 				if ($lx)
 				{
 					$lp = '(?:.{65535}){' . $lx . '}';
 				}
 				$lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
 			}
 		}
 		if (!preg_match('#' . $op . $lp . '#us', $str, $match))
 		{
 			return '';
 		}
 		return $match[1];
 	}
 	/**
 	* Return the length (in characters) of a UTF-8 string
 	*
 	* @param	string	$text		UTF-8 string
 	* @return	integer				Length (in chars) of given string
 	*/
 	function utf8_strlen($text)
 	{
 		// Since utf8_decode is replacing multibyte characters to ? strlen works fine
 		return strlen(utf8_decode($text));
 	}
 }
 /**
@ -867,7 +415,6 @@ function utf8_recode($string, $encoding)
 	// Trigger an error?! Fow now just give bad data :-(
 	trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
 	//return $string; // use utf_normalizer::cleanup() ?
 }
 /**
@ -1611,14 +1158,8 @@ function utf8_case_fold_nfkc($text, $option = 'full')
 	// do the case fold
 	$text = utf8_case_fold($text, $option);
 	if (!class_exists('utf_normalizer'))
 	{
 		global $phpbb_root_path, $phpEx;
 		include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 	}
 	// convert to NFKC
-	utf_normalizer::nfkc($text);
+	Normalizer::normalize($text, Normalizer::NFKC);
 	// FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
 	$text = strtr($text, $fc_nfkc_closure);
@ -1714,106 +1255,56 @@ function utf8_case_fold_nfc($text, $option = 'full')
 	return $text;
 }
-if (extension_loaded('intl'))
+/**
 * wrapper around PHP's native normalizer from intl
 * previously a PECL extension, included in the core since PHP 5.3.0
 * http://php.net/manual/en/normalizer.normalize.php
 *
 * @param	mixed	$strings	a string or an array of strings to normalize
 * @return	mixed				the normalized content, preserving array keys if array given.
 */
 function utf8_normalize_nfc($strings)
 {
-	/**
+	if (empty($strings))
 	* wrapper around PHP's native normalizer from intl
 	* previously a PECL extension, included in the core since PHP 5.3.0
 	* http://php.net/manual/en/normalizer.normalize.php
 	*
 	* @param	mixed	$strings	a string or an array of strings to normalize
 	* @return	mixed				the normalized content, preserving array keys if array given.
 	*/
 	function utf8_normalize_nfc($strings)
 	{
-		if (empty($strings))
+		return $strings;
 	}
 	if (!is_array($strings))
 	{
 		if (Normalizer::isNormalized($strings))
 		{
 			return $strings;
 		}
-
+		return (string) Normalizer::normalize($strings);
-		if (!is_array($strings))
+	}
 	else
 	{
 		foreach ($strings as $key => $string)
 		{
-			if (Normalizer::isNormalized($strings))
+			if (is_array($string))
 			{
-				return $strings;
+				foreach ($string as $_key => $_string)
 			}
 			return (string) Normalizer::normalize($strings);
 		}
 		else
 		{
 			foreach ($strings as $key => $string)
 			{
 				if (is_array($string))
 				{
-					foreach ($string as $_key => $_string)
+					if (Normalizer::isNormalized($strings[$key][$_key]))
 					{
 						if (Normalizer::isNormalized($strings[$key][$_key]))
 						{
 							continue;
 						}
 						$strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
 					}
 				}
 				else
 				{
 					if (Normalizer::isNormalized($strings[$key]))
 					{
 						continue;
 					}
-					$strings[$key] = (string) Normalizer::normalize($strings[$key]);
+					$strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
 				}
 			}
-		}
+			else
 		return $strings;
 	}
 }
 else
 {
 	/**
 	* A wrapper function for the normalizer which takes care of including the class if
 	* required and modifies the passed strings to be in NFC (Normalization Form Composition).
 	*
 	* @param	mixed	$strings	a string or an array of strings to normalize
 	* @return	mixed				the normalized content, preserving array keys if array given.
 	*/
 	function utf8_normalize_nfc($strings)
 	{
 		if (empty($strings))
 		{
 			return $strings;
 		}
 		if (!class_exists('utf_normalizer'))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 		}
 		if (!is_array($strings))
 		{
 			utf_normalizer::nfc($strings);
 		}
 		else if (is_array($strings))
 		{
 			foreach ($strings as $key => $string)
 			{
-				if (is_array($string))
+				if (Normalizer::isNormalized($strings[$key]))
 				{
-					foreach ($string as $_key => $_string)
+					continue;
 					{
 						utf_normalizer::nfc($strings[$key][$_key]);
 					}
 				}
 				else
 				{
 					utf_normalizer::nfc($strings[$key]);
 				}
 				$strings[$key] = (string) Normalizer::normalize($strings[$key]);
 			}
 		}
 		return $strings;
 	}
 	return $strings;
 }
 /**
@ -1959,50 +1450,3 @@ function utf8_basename($filename)
 	return $filename;
 }
 /**
 * UTF8-safe str_replace() function
 *
 * @param string $search The value to search for
 * @param string $replace The replacement string
 * @param string $subject The target string
 * @return string The resultant string
 */
 function utf8_str_replace($search, $replace, $subject)
 {
 	if (!is_array($search))
 	{
 		$search = array($search);
 		if (is_array($replace))
 		{
 			$replace = (string) $replace;
 			trigger_error('Array to string conversion', E_USER_NOTICE);
 		}
 	}
 	$length = sizeof($search);
 	if (!is_array($replace))
 	{
 		$replace = array_fill(0, $length, $replace);
 	}
 	else
 	{
 		$replace = array_pad($replace, $length, '');
 	}
 	for ($i = 0; $i < $length; $i++)
 	{
 		$search_length = utf8_strlen($search[$i]);
 		$replace_length = utf8_strlen($replace[$i]);
 		$offset = 0;
 		while (($start = utf8_strpos($subject, $search[$i], $offset)) !== false)
 		{
 			$subject = utf8_substr($subject, 0, $start) . $replace[$i] . utf8_substr($subject, $start + $search_length);
 			$offset = $start + $replace_length;
 		}
 	}
 	return $subject;
 }
--- a/phpBB/install/data/confusables.php
+++ b/phpBB/install/data/confusables.php
@ -633,14 +633,8 @@ function utf8_new_case_fold_nfkc($text, $option = 'full')
 	// do the case fold
 	$text = utf8_new_case_fold($text, $option);
 	if (!class_exists('utf_normalizer'))
 	{
 		global $phpbb_root_path, $phpEx;
 		include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 	}
 	// convert to NFKC
-	utf_new_normalizer::nfkc($text);
+	$text = Normalizer::normalize($text, Normalizer::NFKC);
 	// FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
 	$text = strtr($text, $fc_nfkc_closure);
--- a/phpBB/install/data/new_normalizer.php
+++ b/phpBB/install/data/new_normalizer.php
@ -1,197 +0,0 @@
 <?php
 /**
 *
 * This file is part of the phpBB Forum Software package.
 *
 * @copyright (c) phpBB Limited <https://www.phpbb.com>
 * @license GNU General Public License, version 2 (GPL-2.0)
 *
 * For full copyright and license information, please see
 * the docs/CREDITS.txt file.
 *
 */
 /**
 * @ignore
 */
 if (!defined('IN_PHPBB'))
 {
 	exit;
 }
 /**
 * A wrapper function for the normalizer which takes care of including the class if required and modifies the passed strings
 * to be in NFC (Normalization Form Composition).
 *
 * @param	mixed	$strings	a string or an array of strings to normalize
 * @return	mixed				the normalized content, preserving array keys if array given.
 */
 function utf8_new_normalize_nfc($strings)
 {
 	if (empty($strings))
 	{
 		return $strings;
 	}
 	if (!is_array($strings))
 	{
 		utf_new_normalizer::nfc($strings);
 	}
 	else if (is_array($strings))
 	{
 		foreach ($strings as $key => $string)
 		{
 			if (is_array($string))
 			{
 				foreach ($string as $_key => $_string)
 				{
 					utf_new_normalizer::nfc($strings[$key][$_key]);
 				}
 			}
 			else
 			{
 				utf_new_normalizer::nfc($strings[$key]);
 			}
 		}
 	}
 	return $strings;
 }
 class utf_new_normalizer
 {
 	/**
 	* Validate, cleanup and normalize a string
 	*
 	* The ultimate convenience function! Clean up invalid UTF-8 sequences,
 	* and convert to Normal Form C, canonical composition.
 	*
 	* @param	string	&$str	The dirty string
 	* @return	string			The same string, all shiny and cleaned-up
 	*/
 	function cleanup(&$str)
 	{
 		// The string below is the list of all autorized characters, sorted by frequency in latin text
 		$pos = strspn($str, "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x0D");
 		$len = strlen($str);
 		if ($pos == $len)
 		{
 			// ASCII strings with no special chars return immediately
 			return;
 		}
 		// Note: we do not check for $GLOBALS['utf_canonical_decomp']. It is assumed they are always loaded together
 		if (!isset($GLOBALS['utf_nfc_qc']))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
 		}
 		if (!isset($GLOBALS['utf_canonical_decomp']))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_canonical_decomp.' . $phpEx);
 		}
 		// Replace any byte in the range 0x00..0x1F, except for \r, \n and \t
 		// We replace those characters with a 0xFF byte, which is illegal in UTF-8 and will in turn be replaced with a UTF replacement char
 		$str = strtr(
 			$str,
 			"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
 			"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 		);
 		$str = utf_new_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']);
 	}
 	/**
 	* Validate and normalize a UTF string to NFC
 	*
 	* @param	string	&$str	Unchecked UTF string
 	* @return	string			The string, validated and in normal form
 	*/
 	function nfc(&$str)
 	{
 		$pos = strspn($str, UTF8_ASCII_RANGE);
 		$len = strlen($str);
 		if ($pos == $len)
 		{
 			// ASCII strings return immediately
 			return;
 		}
 		if (!isset($GLOBALS['utf_nfc_qc']))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_nfc_qc.' . $phpEx);
 		}
 		if (!isset($GLOBALS['utf_canonical_decomp']))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_canonical_decomp.' . $phpEx);
 		}
 		$str = utf_new_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfc_qc'], $GLOBALS['utf_canonical_decomp']);
 	}
 	/**
 	* Validate and normalize a UTF string to NFKC
 	*
 	* @param	string	&$str	Unchecked UTF string
 	* @return	string			The string, validated and in normal form
 	*/
 	function nfkc(&$str)
 	{
 		$pos = strspn($str, UTF8_ASCII_RANGE);
 		$len = strlen($str);
 		if ($pos == $len)
 		{
 			// ASCII strings return immediately
 			return;
 		}
 		if (!isset($GLOBALS['utf_nfkc_qc']))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_nfkc_qc.' . $phpEx);
 		}
 		if (!isset($GLOBALS['utf_compatibility_decomp']))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_compatibility_decomp.' . $phpEx);
 		}
 		$str = utf_new_normalizer::recompose($str, $pos, $len, $GLOBALS['utf_nfkc_qc'], $GLOBALS['utf_compatibility_decomp']);
 	}
 	/**
 	* Recompose a UTF string
 	*
 	* @param	string	$str			Unchecked UTF string
 	* @param	integer	$pos			Position of the first UTF char (in bytes)
 	* @param	integer	$len			Length of the string (in bytes)
 	* @param	array	&$qc			Quick-check array, passed by reference but never modified
 	* @param	array	&$decomp_map	Decomposition mapping, passed by reference but never modified
 	* @return	string					The string, validated and recomposed
 	*
 	* @access	private
 	*/
 	function recompose($str, $pos, $len, &$qc, &$decomp_map)
 	{
 		global $utf_canonical_comp;
 		// Load the canonical composition table
 		if (!isset($utf_canonical_comp))
 		{
 			global $phpbb_root_path, $phpEx;
 			include($phpbb_root_path . 'includes/utf/data/utf_canonical_comp.' . $phpEx);
 		}
 		return utf_normalizer::recompose($str, $pos, $len, $qc, $decomp_map);
 	}
 }
--- a/phpBB/install/database_update.php
+++ b/phpBB/install/database_update.php
@ -74,7 +74,6 @@ require($phpbb_root_path . 'includes/functions.' . $phpEx);
 require($phpbb_root_path . 'includes/functions_content.' . $phpEx);
 require($phpbb_root_path . 'includes/constants.' . $phpEx);
 include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
 require($phpbb_root_path . 'includes/utf/utf_tools.' . $phpEx);
 // Set PHP error handler to ours
--- a/phpBB/install/index.php
+++ b/phpBB/install/index.php
@ -102,7 +102,6 @@ phpbb_require_updated('includes/functions.' . $phpEx);
 phpbb_require_updated('includes/functions_content.' . $phpEx, true);
 phpbb_include_updated('includes/functions_admin.' . $phpEx);
 phpbb_include_updated('includes/utf/utf_normalizer.' . $phpEx);
 phpbb_include_updated('includes/utf/utf_tools.' . $phpEx);
 phpbb_require_updated('includes/functions_install.' . $phpEx);
--- a/phpBB/phpbb/db/driver/driver.php
+++ b/phpBB/phpbb/db/driver/driver.php
@ -363,8 +363,8 @@ abstract class driver implements driver_interface
 	*/
 	function sql_like_expression($expression)
 	{
-		$expression = utf8_str_replace(array('_', '%'), array("\_", "\%"), $expression);
+		$expression = str_replace(array('_', '%'), array("\_", "\%"), $expression);
-		$expression = utf8_str_replace(array(chr(0) . "\_", chr(0) . "\%"), array('_', '%'), $expression);
+		$expression = str_replace(array(chr(0) . "\_", chr(0) . "\%"), array('_', '%'), $expression);
 		return $this->_sql_like_expression('LIKE \'' . $this->sql_escape($expression) . '\'');
 	}
@ -374,8 +374,8 @@ abstract class driver implements driver_interface
 	*/
 	function sql_not_like_expression($expression)
 	{
-		$expression = utf8_str_replace(array('_', '%'), array("\_", "\%"), $expression);
+		$expression = str_replace(array('_', '%'), array("\_", "\%"), $expression);
-		$expression = utf8_str_replace(array(chr(0) . "\_", chr(0) . "\%"), array('_', '%'), $expression);
+		$expression = str_replace(array(chr(0) . "\_", chr(0) . "\%"), array('_', '%'), $expression);
 		return $this->_sql_not_like_expression('NOT LIKE \'' . $this->sql_escape($expression) . '\'');
 	}
--- a/phpBB/phpbb/search/fulltext_native.php
+++ b/phpBB/phpbb/search/fulltext_native.php
@ -18,6 +18,13 @@ namespace phpbb\search;
 */
 class fulltext_native extends \phpbb\search\base
 {
 	const UTF8_HANGUL_FIRST = "\xEA\xB0\x80";
 	const UTF8_HANGUL_LAST = "\xED\x9E\xA3";
 	const UTF8_CJK_FIRST = "\xE4\xB8\x80";
 	const UTF8_CJK_LAST = "\xE9\xBE\xBB";
 	const UTF8_CJK_B_FIRST = "\xF0\xA0\x80\x80";
 	const UTF8_CJK_B_LAST = "\xF0\xAA\x9B\x96";
 	/**
 	 * Associative array holding index stats
 	 * @var array
@ -93,7 +100,7 @@ class fulltext_native extends \phpbb\search\base
 	protected $user;
 	/**
-	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded
+	* Initialises the fulltext_native search backend with min/max word length
 	*
 	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure
 	*/
@ -110,10 +117,6 @@ class fulltext_native extends \phpbb\search\base
 		/**
 		* Load the UTF tools
 		*/
 		if (!class_exists('utf_normalizer'))
 		{
 			include($this->phpbb_root_path . 'includes/utf/utf_normalizer.' . $this->php_ext);
 		}
 		if (!function_exists('utf8_decode_ncr'))
 		{
 			include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);
@ -1175,9 +1178,9 @@ class fulltext_native extends \phpbb\search\base
 				* Note: this could be optimized. If the codepoint is lower than Hangul's range
 				* we know that it will also be lower than CJK ranges
 				*/
-				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
+				if ((strncmp($word, self::UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, self::UTF8_HANGUL_LAST, 3) > 0)
-					&& (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
+					&& (strncmp($word, self::UTF8_CJK_FIRST, 3) < 0 || strncmp($word, self::UTF8_CJK_LAST, 3) > 0)
-					&& (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
+					&& (strncmp($word, self::UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, self::UTF8_CJK_B_LAST, 4) > 0))
 				{
 					$word = strtok(' ');
 					continue;
@ -1544,8 +1547,6 @@ class fulltext_native extends \phpbb\search\base
 	* @param	string	$allowed_chars	String of special chars to allow
 	* @param	string	$encoding		Text encoding
 	* @return	string					Cleaned up text, only alphanumeric chars are left
 	*
 	* @todo \normalizer::cleanup being able to be used?
 	*/
 	protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
 	{
@ -1572,12 +1573,9 @@ class fulltext_native extends \phpbb\search\base
 		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
 		/**
-		* Load the UTF-8 normalizer
+		* Normalize to NFC
 		*
 		* If we use it more widely, an instance of that class should be held in a
 		* a global variable instead
 		*/
-		\utf_normalizer::nfc($text);
+		$text = \Normalizer::normalize($text);
 		/**
 		* The first thing we do is:
@ -1670,9 +1668,9 @@ class fulltext_native extends \phpbb\search\base
 			$utf_char = substr($text, $pos, $utf_len);
 			$pos += $utf_len;
-			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
+			if (($utf_char >= self::UTF8_HANGUL_FIRST && $utf_char <= self::UTF8_HANGUL_LAST)
-				|| ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
+				|| ($utf_char >= self::UTF8_CJK_FIRST && $utf_char <= self::UTF8_CJK_LAST)
-				|| ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
+				|| ($utf_char >= self::UTF8_CJK_B_FIRST && $utf_char <= self::UTF8_CJK_B_LAST))
 			{
 				/**
 				* All characters within these ranges are valid
--- a/tests/RUNNING_TESTS.md
+++ b/tests/RUNNING_TESTS.md
@ -120,8 +120,9 @@ directory (above phpBB):
 Slow tests
 --------------
-Certain tests, such as the UTF-8 normalizer or the DNS tests tend to be slow.
+Certain tests, such as the DNS tests tend to be slow.
-Thus these tests are in the `slow` group, which is excluded by default. If you
+Thus these tests are in the `slow` group, which is excluded by default. You can
 enable slow tests by copying the phpunit.xml.all file to phpunit.xml. If you
 only want the slow tests, run:
    $ phpBB/vendor/bin/phpunit --group slow
--- a/tests/utf/normalizer_test.php
+++ b/tests/utf/normalizer_test.php
@ -1,327 +0,0 @@
 <?php
 /**
 *
 * This file is part of the phpBB Forum Software package.
 *
 * @copyright (c) phpBB Limited <https://www.phpbb.com>
 * @license GNU General Public License, version 2 (GPL-2.0)
 *
 * For full copyright and license information, please see
 * the docs/CREDITS.txt file.
 *
 */
 require_once dirname(__FILE__) . '/../../phpBB/includes/utf/utf_normalizer.php';
 /**
 * @group slow
 */
 class phpbb_utf_normalizer_test extends phpbb_test_case
 {
 	static private $data_dir;
 	static public function setUpBeforeClass()
 	{
 		self::$data_dir = dirname(__file__) . '/../tmp';
 		self::download('http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt', self::$data_dir);
 		self::download('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt', self::$data_dir);
 	}
 	public function test_normalizer()
 	{
 		$test_suite = array(
 			/**
 			* NFC
 			*   c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
 			*   c4 ==  NFC(c4) ==  NFC(c5)
 			*/
 			'NFC'	=>	array(
 				'c2'	=>	array('c1', 'c2', 'c3'),
 				'c4'	=>	array('c4', 'c5')
 			),
 			/**
 			* NFD
 			*   c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
 			*   c5 ==  NFD(c4) ==  NFD(c5)
 			*/
 			'NFD'	=>	array(
 				'c3'	=>	array('c1', 'c2', 'c3'),
 				'c5'	=>	array('c4', 'c5')
 			),
 			/**
 			* NFKC
 			*   c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
 			*/
 			'NFKC'	=>	array(
 				'c4'	=>	array('c1', 'c2', 'c3', 'c4', 'c5')
 			),
 			/**
 			* NFKD
 			*   c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
 			*/
 			'NFKD'	=>	array(
 				'c5'	=>	array('c1', 'c2', 'c3', 'c4', 'c5')
 			)
 		);
 		$tested_chars = array();
 		$fp = fopen(self::$data_dir . '/NormalizationTest.txt', 'rb');
 		while (!feof($fp))
 		{
 			$line = fgets($fp);
 			if ($line[0] == '@')
 			{
 				continue;
 			}
 			if (!strpos(' 0123456789ABCDEF', $line[0]))
 			{
 				continue;
 			}
 			list($c1, $c2, $c3, $c4, $c5) = explode(';', $line);
 			if (!strpos($c1, ' '))
 			{
 				/**
 				* We are currently testing a single character, we add it to the list of
 				* characters we have processed so that we can exclude it when testing
 				* for invariants
 				*/
 				$tested_chars[$c1] = 1;
 			}
 			foreach ($test_suite as $form => $serie)
 			{
 				foreach ($serie as $expected => $tests)
 				{
 					$hex_expected = ${$expected};
 					$utf_expected = $this->hexseq_to_utf($hex_expected);
 					foreach ($tests as $test)
 					{
 						$utf_result = $utf_expected;
 						call_user_func_array(array('utf_normalizer', $form), array(&$utf_result));
 						$hex_result = $this->utf_to_hexseq($utf_result);
 						$this->assertEquals($utf_expected, $utf_result, "$expected == $form($test) ($hex_expected != $hex_result)");
 					}
 				}
 			}
 		}
 		fclose($fp);
 		return $tested_chars;
 	}
 	/**
 	* @depends test_normalizer
 	*/
 	public function test_invariants(array $tested_chars)
 	{
 		$fp = fopen(self::$data_dir . '/UnicodeData.txt', 'rb');
 		while (!feof($fp))
 		{
 			$line = fgets($fp, 1024);
 			if (!$pos = strpos($line, ';'))
 			{
 				continue;
 			}
 			$hex_tested = $hex_expected = substr($line, 0, $pos);
 			if (isset($tested_chars[$hex_tested]))
 			{
 				continue;
 			}
 			$utf_expected = $this->hex_to_utf($hex_expected);
 			if ($utf_expected >= UTF8_SURROGATE_FIRST
 			 && $utf_expected <= UTF8_SURROGATE_LAST)
 			{
 				/**
 				* Surrogates are illegal on their own, we expect the normalizer
 				* to return a replacement char
 				*/
 				$utf_expected = UTF8_REPLACEMENT;
 				$hex_expected = $this->utf_to_hexseq($utf_expected);
 			}
 			foreach (array('nfc', 'nfkc', 'nfd', 'nfkd') as $form)
 			{
 				$utf_result = $utf_expected;
 				call_user_func_array(array('utf_normalizer', $form), array(&$utf_result));
 				$hex_result = $this->utf_to_hexseq($utf_result);
 				$this->assertEquals($utf_expected, $utf_result, "$hex_expected == $form($hex_tested) ($hex_expected != $hex_result)");
 			}
 		}
 		fclose($fp);
 	}
 	/**
 	* Convert a UTF string to a sequence of codepoints in hexadecimal
 	*
 	* @param	string	$utf	UTF string
 	* @return	integer			Unicode codepoints in hex
 	*/
 	protected function utf_to_hexseq($str)
 	{
 		$pos = 0;
 		$len = strlen($str);
 		$ret = array();
 		while ($pos < $len)
 		{
 			$c = $str[$pos];
 			switch ($c & "\xF0")
 			{
 				case "\xC0":
 				case "\xD0":
 					$utf_char = substr($str, $pos, 2);
 					$pos += 2;
 					break;
 				case "\xE0":
 					$utf_char = substr($str, $pos, 3);
 					$pos += 3;
 					break;
 				case "\xF0":
 					$utf_char = substr($str, $pos, 4);
 					$pos += 4;
 					break;
 				default:
 					$utf_char = $c;
 					++$pos;
 			}
 			$hex = dechex($this->utf_to_cp($utf_char));
 			if (!isset($hex[3]))
 			{
 				$hex = substr('000' . $hex, -4);
 			}
 			$ret[] = $hex;
 		}
 		return strtr(implode(' ', $ret), 'abcdef', 'ABCDEF');
 	}
 	/**
 	* Convert a UTF-8 char to its codepoint
 	*
 	* @param	string	$utf_char	UTF-8 char
 	* @return	integer				Unicode codepoint
 	*/
 	protected function utf_to_cp($utf_char)
 	{
 		switch (strlen($utf_char))
 		{
 			case 1:
 				return ord($utf_char);
 			case 2:
 				return ((ord($utf_char[0]) & 0x1F) << 6) | (ord($utf_char[1]) & 0x3F);
 			case 3:
 				return ((ord($utf_char[0]) & 0x0F) << 12) | ((ord($utf_char[1]) & 0x3F) << 6) | (ord($utf_char[2]) & 0x3F);
 			case 4:
 				return ((ord($utf_char[0]) & 0x07) << 18) | ((ord($utf_char[1]) & 0x3F) << 12) | ((ord($utf_char[2]) & 0x3F) << 6) | (ord($utf_char[3]) & 0x3F);
 			default:
 				throw new RuntimeException('UTF-8 chars can only be 1-4 bytes long');
 		}
 	}
 	/**
 	* Return a UTF string formed from a sequence of codepoints in hexadecimal
 	*
 	* @param	string	$seq		Sequence of codepoints, separated with a space
 	* @return	string				UTF-8 string
 	*/
 	protected function hexseq_to_utf($seq)
 	{
 		return implode('', array_map(array($this, 'hex_to_utf'), explode(' ', $seq)));
 	}
 	/**
 	* Convert a codepoint in hexadecimal to a UTF-8 char
 	*
 	* @param	string	$hex		Codepoint, in hexadecimal
 	* @return	string				UTF-8 char
 	*/
 	protected function hex_to_utf($hex)
 	{
 		return $this->cp_to_utf(hexdec($hex));
 	}
 	/**
 	* Convert a codepoint to a UTF-8 char
 	*
 	* @param	integer	$cp			Unicode codepoint
 	* @return	string				UTF-8 string
 	*/
 	protected function cp_to_utf($cp)
 	{
 		if ($cp > 0xFFFF)
 		{
 			return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 		}
 		else if ($cp > 0x7FF)
 		{
 			return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
 		}
 		else if ($cp > 0x7F)
 		{
 			return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
 		}
 		else
 		{
 			return chr($cp);
 		}
 	}
 	// chunked download helper
 	static protected function download($url, $to)
 	{
 		$target = $to . '/' . basename($url);
 		if (file_exists($target))
 		{
 			return;
 		}
 		if (!$fpr = fopen($url, 'rb'))
 		{
 			echo "Failed to download $url\n";
 			return;
 		}
 		if (!$fpw = fopen($target, 'wb'))
 		{
 			echo "Failed to open $target for writing\n";
 			return;
 		}
 		$chunk = 32768;
 		while (!feof($fpr))
 		{
 			fwrite($fpw, fread($fpr, $chunk));
 		}
 		fclose($fpr);
 		fclose($fpw);
 	}
 }