diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php index 12e4ebc597..cbd39891c9 100644 --- a/phpBB/includes/functions.php +++ b/phpBB/includes/functions.php @@ -3245,30 +3245,18 @@ function get_preg_expression($mode) * Generate regexp for naughty words censoring * Depends on whether installed PHP version supports unicode properties * -* @param string $word word template to be replaced +* @param string $word word template to be replaced +* @param bool $use_unicode whether or not to take advantage of PCRE supporting unicode * * @return string $preg_expr regex to use with word censor */ -function get_censor_preg_expression($word) +function get_censor_preg_expression($word, $use_unicode = true) { - static $unicode = null; + // Unescape the asterisk to simplify further conversions + $word = str_replace('\*', '*', preg_quote($word, '#')); - if (empty($word)) + if ($use_unicode && pcre_utf8_support()) { - return ''; - } - - // Check whether PHP version supports unicode properties - if (is_null($unicode)) - { - $unicode = pcre_utf8_support(); - } - - if ($unicode) - { - // Unescape the asterisk to simplify further conversions - $word = str_replace('\*', '*', preg_quote($word, '#')); - // Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes $word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); @@ -3277,7 +3265,11 @@ function get_censor_preg_expression($word) } else { - $preg_expr = '#(?assertRegExp($regex, $subject); + } + + /** + * @dataProvider censor_test_data + */ + public function test_censor_no_unicode($pattern, $subject) + { + $regex = get_censor_preg_expression($pattern, false); $this->assertRegExp($regex, $subject); }