mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-08 04:18:52 +00:00
Merge branch 'ticket/rxu/9933' into develop-olympus
* ticket/rxu/9933: [ticket/9933] Create unit test for word censor regular expression. [ticket/9933] Move word censor regex into separate function in functions.php [ticket/9933] Wrong handling consecutive multiple asterisks in word censor
This commit is contained in:
commit
a469e80493
4 changed files with 86 additions and 18 deletions
|
@ -95,6 +95,9 @@ class acp_words
|
|||
trigger_error($user->lang['ENTER_WORD'] . adm_back_link($this->u_action), E_USER_WARNING);
|
||||
}
|
||||
|
||||
// Replace multiple consecutive asterisks with single one as those are not needed
|
||||
$word = preg_replace('#\*{2,}#', '*', $word);
|
||||
|
||||
$sql_ary = array(
|
||||
'word' => $word,
|
||||
'replacement' => $replacement
|
||||
|
|
|
@ -82,26 +82,9 @@ class cache extends acm
|
|||
$result = $db->sql_query($sql);
|
||||
|
||||
$censors = array();
|
||||
$unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false;
|
||||
|
||||
while ($row = $db->sql_fetchrow($result))
|
||||
{
|
||||
if ($unicode)
|
||||
{
|
||||
// Unescape the asterisk to simplify further conversions
|
||||
$row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#'));
|
||||
|
||||
// Replace the asterisk inside the pattern, at the start and at the end of it with regexes
|
||||
$row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']);
|
||||
|
||||
// Generate the final substitution
|
||||
$censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu';
|
||||
}
|
||||
else
|
||||
{
|
||||
$censors['match'][] = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($row['word'], '#')) . ')(?!\S)#iu';
|
||||
}
|
||||
|
||||
$censors['match'][] = get_censor_preg_expression($row['word']);
|
||||
$censors['replace'][] = $row['replacement'];
|
||||
}
|
||||
$db->sql_freeresult($result);
|
||||
|
|
|
@ -3428,6 +3428,48 @@ function get_preg_expression($mode)
|
|||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate regexp for naughty words censoring
|
||||
* Depends on whether installed PHP version supports unicode properties
|
||||
*
|
||||
* @param string $word word template to be replaced
|
||||
*
|
||||
* @return string $preg_expr regex to use with word censor
|
||||
*/
|
||||
function get_censor_preg_expression($word)
|
||||
{
|
||||
static $unicode = null;
|
||||
|
||||
if (empty($word))
|
||||
{
|
||||
return '';
|
||||
}
|
||||
|
||||
// Check whether PHP version supports unicode properties
|
||||
if (is_null($unicode))
|
||||
{
|
||||
$unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false;
|
||||
}
|
||||
|
||||
if ($unicode)
|
||||
{
|
||||
// Unescape the asterisk to simplify further conversions
|
||||
$word = str_replace('\*', '*', preg_quote($word, '#'));
|
||||
|
||||
// Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes
|
||||
$word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word);
|
||||
|
||||
// Generate the final substitution
|
||||
$preg_expr = '#(?<![\p{Nd}\p{L}_-])(' . $word . ')(?![\p{Nd}\p{L}_-])#iu';
|
||||
}
|
||||
else
|
||||
{
|
||||
$preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu';
|
||||
}
|
||||
|
||||
return $preg_expr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first block of the specified IPv6 address and as many additional
|
||||
* ones as specified in the length paramater.
|
||||
|
|
40
tests/regex/censor.php
Normal file
40
tests/regex/censor.php
Normal file
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
/**
|
||||
*
|
||||
* @package testing
|
||||
* @copyright (c) 2010 phpBB Group
|
||||
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
|
||||
*
|
||||
*/
|
||||
|
||||
require_once __DIR__ . '/../../phpBB/includes/functions.php';
|
||||
|
||||
class phpbb_regex_censor_test extends phpbb_test_case
|
||||
{
|
||||
public function censor_test_data()
|
||||
{
|
||||
return array(
|
||||
array('bad*word', 'bad word'),
|
||||
array('bad***word', 'bad word'),
|
||||
array('bad**word', 'bad word'),
|
||||
array('*bad*word*', 'bad word'),
|
||||
array('b*d', 'bad'),
|
||||
array('*bad*', 'bad'),
|
||||
array('*b*d*', 'bad'),
|
||||
array('*b*d*', 'b d'),
|
||||
array('b*d*word', 'bad word'),
|
||||
array('**b**d**word**', 'bad word'),
|
||||
array('**b**d**word**', 'the bad word catched'),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider censor_test_data
|
||||
*/
|
||||
public function test_censor($pattern, $subject)
|
||||
{
|
||||
$regex = get_censor_preg_expression($pattern);
|
||||
|
||||
$this->assertRegExp($regex, $subject);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue