mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-27 21:58:52 +00:00
#7308 git-svn-id: file:///svn/phpbb/trunk@6952 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
parent
49470a87c3
commit
6ec09e6f74
2 changed files with 47 additions and 58 deletions
|
@ -929,6 +929,40 @@ function cp1251($string)
|
|||
return strtr($string, $transform);
|
||||
}
|
||||
|
||||
function cp1252($string)
|
||||
{
|
||||
static $transform = array(
|
||||
"\xC2\x80" => "\xE2\x82\xAC",
|
||||
"\xC2\x82" => "\xE2\x80\x9A",
|
||||
"\xC2\x83" => "\xC6\x92",
|
||||
"\xC2\x84" => "\xE2\x80\x9E",
|
||||
"\xC2\x85" => "\xE2\x80\xA6",
|
||||
"\xC2\x86" => "\xE2\x80\xA0",
|
||||
"\xC2\x87" => "\xE2\x80\xA1",
|
||||
"\xC2\x88" => "\xCB\x86",
|
||||
"\xC2\x89" => "\xE2\x80\xB0",
|
||||
"\xC2\x8A" => "\xC5\xA0",
|
||||
"\xC2\x8B" => "\xE2\x80\xB9",
|
||||
"\xC2\x8C" => "\xC5\x92",
|
||||
"\xC2\x8E" => "\xC5\xBD",
|
||||
"\xC2\x91" => "\xE2\x80\x98",
|
||||
"\xC2\x92" => "\xE2\x80\x99",
|
||||
"\xC2\x93" => "\xE2\x80\x9C",
|
||||
"\xC2\x94" => "\xE2\x80\x9D",
|
||||
"\xC2\x95" => "\xE2\x80\xA2",
|
||||
"\xC2\x96" => "\xE2\x80\x93",
|
||||
"\xC2\x97" => "\xE2\x80\x94",
|
||||
"\xC2\x98" => "\xCB\x9C",
|
||||
"\xC2\x99" => "\xE2\x84\xA2",
|
||||
"\xC2\x9A" => "\xC5\xA1",
|
||||
"\xC2\x9B" => "\xE2\x80\xBA",
|
||||
"\xC2\x9C" => "\xC5\x93",
|
||||
"\xC2\x9E" => "\xC5\xBE",
|
||||
"\xC2\x9F" => "\xC5\xB8"
|
||||
);
|
||||
return strtr(utf8_encode($string), $transform);
|
||||
}
|
||||
|
||||
function cp1254($string)
|
||||
{
|
||||
static $tranform = array(
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
* @copyright (c) 2006 phpBB Group
|
||||
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
|
||||
*
|
||||
* @todo make sure the replacements are called correctly
|
||||
* already done: strtolower, strtoupper, ucfirst, str_split, strrpos, strlen (hopefully!), strpos, substr, htmlspecialchars
|
||||
* remaining: strspn, chr, ord
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -700,15 +703,15 @@ function utf8_recode($string, $encoding)
|
|||
{
|
||||
$encoding = strtolower($encoding);
|
||||
|
||||
if ($encoding == 'utf-8' || !is_string($string) || !isset($string[0]))
|
||||
if ($encoding == 'utf-8' || !is_string($string) || empty($string))
|
||||
{
|
||||
return $string;
|
||||
}
|
||||
|
||||
// start with something simple
|
||||
// we force iso-8859-1 to be cp1252
|
||||
if ($encoding == 'iso-8859-1')
|
||||
{
|
||||
return utf8_encode($string);
|
||||
$encoding = 'cp1252';
|
||||
}
|
||||
|
||||
// First, try iconv()
|
||||
|
@ -790,6 +793,7 @@ function utf8_recode($string, $encoding)
|
|||
break;
|
||||
case '1250':
|
||||
case '1251':
|
||||
case '1252':
|
||||
case '1254':
|
||||
case '1255':
|
||||
case '1256':
|
||||
|
@ -1103,6 +1107,9 @@ function utf8_clean_string($text)
|
|||
utf_normalizer::nfc($text);
|
||||
|
||||
static $homographs = array(
|
||||
"\x08" => '', // BACKSPACE => empty string
|
||||
"\x09" => "\x20", // CHARACTER TABULATION => SPACE
|
||||
"\x11" => "\x20", // Device Controls => SPACE
|
||||
"\xC2\xA1" => "\x69", // EXCLAMATION MARK, INVERTED => LATIN SMALL LETTER I
|
||||
"\xC2\xAD" => '', // HYPHEN, SOFT => empty string
|
||||
"\xC4\x90" => "\xC3\x90", // LATIN CAPITAL LETTER D WITH STROKE => LATIN CAPITAL LETTER ETH
|
||||
|
@ -1172,6 +1179,7 @@ function utf8_clean_string($text)
|
|||
"\xE1\xB4\xA8" => "\xD0\xBF", // GREEK LETTER SMALL CAPITAL PI => CYRILLIC SMALL LETTER PE
|
||||
"\xE1\xB4\xA9" => "\xE1\xB4\x98", // GREEK LETTER SMALL CAPITAL RHO => LATIN LETTER SMALL CAPITAL P
|
||||
"\xE1\xB4\xAB" => "\xD0\xBB", // CYRILLIC LETTER SMALL CAPITAL EL => CYRILLIC SMALL LETTER EL
|
||||
"\xE2\x80\x81" => "\x20", // EM QUAD => SPACE
|
||||
"\xE2\x8D\xB3" => "\xC9\xA9", // APL FUNCTIONAL SYMBOL IOTA => LATIN SMALL LETTER IOTA
|
||||
"\xE2\x8D\xB4" => "\xCF\x81", // APL FUNCTIONAL SYMBOL RHO => GREEK SMALL LETTER RHO
|
||||
"\xE2\x8D\xB5" => "\xCF\x89", // APL FUNCcTIONAL SYMBOL OMEGA => GREEK SMALL LETTER OMEGA
|
||||
|
@ -1182,63 +1190,10 @@ function utf8_clean_string($text)
|
|||
"\xF0\x90\x8F\x93" => "\xF0\x90\x8E\x93", // OLD PERSIAN NUMBER TEN => UGARITIC LETTER AIN
|
||||
"\xF0\x90\x92\xA0" => "\xF0\x90\x92\x86", // OSMANYA DIGIT ZERO => OSMANYA LETTER DEEL
|
||||
"\xF0\x92\x80\xB8" => "\xF0\x90\x8E\x9A", // CUNEIFORM SIGN ASH => UGARITIC LETTER TO
|
||||
|
||||
"\xC2\xA0" => "\x20", // NO-BREAK SPACE
|
||||
"\xE1\x9A\x80" => "\x20", // OGHAM SPACE MARK
|
||||
"\xE2\x80\x80" => "\x20", // EN QUAD
|
||||
"\xE2\x80\x81" => "\x20", // EM QUAD
|
||||
"\xE2\x80\x82" => "\x20", // EN SPACE
|
||||
"\xE2\x80\x83" => "\x20", // EM SPACE
|
||||
"\xE2\x80\x84" => "\x20", // THREE-PER-EM SPACE
|
||||
"\xE2\x80\x85" => "\x20", // FOUR-PER-EM SPACE
|
||||
"\xE2\x80\x86" => "\x20", // SIX-PER-EM SPACE
|
||||
"\xE2\x80\x87" => "\x20", // FIGURE SPACE
|
||||
"\xE2\x80\x88" => "\x20", // PUNCTUATION SPACE
|
||||
"\xE2\x80\x89" => "\x20", // THIN SPACE
|
||||
"\xE2\x80\x8A" => "\x20", // HAIR SPACE
|
||||
"\xE2\x80\xAF" => "\x20", // NARROW NO-BREAK SPACE
|
||||
"\xE2\x81\x9F" => "\x20", // MEDIUM MATHEMATICAL SPACE
|
||||
"\xE3\x80\x80" => "\x20", // IDEOGRAPHIC SPACE
|
||||
|
||||
"\xDB\x9D" => '', // ARABIC END OF AYAH
|
||||
"\xDC\x8F" => '', // SYRIAC ABBREVIATION MARK
|
||||
"\xE1\xA0\x86" => '', // MONGOLIAN TODO SOFT HYPHEN
|
||||
"\xE1\xA0\x8E" => '', // MONGOLIAN VOWEL SEPARATOR
|
||||
"\xE2\x80\x8B" => '', // ZERO WIDTH SPACE
|
||||
"\xE2\x80\x8C" => '', // ZERO WIDTH NON-JOINER
|
||||
"\xE2\x80\x8D" => '', // ZERO WIDTH JOINER
|
||||
"\xE2\x80\xA8" => '', // LINE SEPARATOR
|
||||
"\xE2\x80\xA9" => '', // PARAGRAPH SEPARATOR
|
||||
"\xE2\x81\xA0" => '', // WORD JOINER
|
||||
"\xE2\x81\xA1" => '', // FUNCTION APPLICATION
|
||||
"\xE2\x81\xA2" => '', // INVISIBLE TIMES
|
||||
"\xE2\x81\xA3" => '', // INVISIBLE SEPARATOR
|
||||
"\xE2\x81\xAA" => '', // [CONTROL CHARACTERS]
|
||||
"\xE2\x81\xAB" => '', // [CONTROL CHARACTERS]
|
||||
"\xE2\x81\xAC" => '', // [CONTROL CHARACTERS]
|
||||
"\xE2\x81\xAD" => '', // [CONTROL CHARACTERS]
|
||||
"\xE2\x81\xAE" => '', // [CONTROL CHARACTERS]
|
||||
"\xE2\x81\xAF" => '', // [CONTROL CHARACTERS]
|
||||
"\xEF\xBB\xBF" => '', // ZERO WIDTH NO-BREAK SPACE
|
||||
"\xEF\xBF\xB9" => '', // [CONTROL CHARACTERS]
|
||||
"\xEF\xBF\xBA" => '', // [CONTROL CHARACTERS]
|
||||
"\xEF\xBF\xBB" => '', // [CONTROL CHARACTERS]
|
||||
"\xEF\xBF\xBC" => '', // [CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB3" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB4" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB5" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB6" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB7" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB8" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xB9" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
"\xF0\x9D\x85\xBA" => '', // [MUSICAL CONTROL CHARACTERS]
|
||||
);
|
||||
|
||||
$text = strtr($text, $homographs);
|
||||
|
||||
// Other control characters
|
||||
$text = preg_replace('#(?:[\x00-\x1F\x7F]+|(?:\xC2[\x80-\x9F])+)#', '', $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue