- allow searches for multibyte characters in fulltext_native

- use preg_quote safely


git-svn-id: file:///svn/phpbb/trunk@5853 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
Nils Adermann 2006-04-27 14:20:43 +00:00
parent 5f503ddabf
commit 5994371c0a
5 changed files with 43 additions and 29 deletions

View file

@ -23,7 +23,7 @@ class compress
$skip_files = explode(',', $skip_files); $skip_files = explode(',', $skip_files);
// Remove rm prefix from src path // Remove rm prefix from src path
$src_path = ($src_rm_prefix) ? preg_replace('#^(' . preg_quote($src_rm_prefix) . ')#', '', $src) : $src; $src_path = ($src_rm_prefix) ? preg_replace('#^(' . preg_quote($src_rm_prefix, '#') . ')#', '', $src) : $src;
// Add src prefix // Add src prefix
$src_path = ($src_add_prefix) ? ($src_add_prefix . ((substr($src_add_prefix, -1) != '/') ? '/' : '') . $src_path) : $src_path; $src_path = ($src_add_prefix) ? ($src_add_prefix . ((substr($src_add_prefix, -1) != '/') ? '/' : '') . $src_path) : $src_path;
// Remove initial "/" if present // Remove initial "/" if present

View file

@ -1227,7 +1227,7 @@ function mail_encode($str, $encoding)
$str = chunk_split(base64_encode($str), $length, $spacer); $str = chunk_split(base64_encode($str), $length, $spacer);
// remove trailing spacer and add start and end delimiters // remove trailing spacer and add start and end delimiters
$str = preg_replace('#' . preg_quote($spacer) . '$#', '', $str); $str = preg_replace('#' . preg_quote($spacer, '#') . '$#', '', $str);
return $start . $str . $end; return $start . $str . $end;
} }

View file

@ -53,28 +53,28 @@ define('CHECK_TO', 5);
$global_privmsgs_rules = array( $global_privmsgs_rules = array(
CHECK_SUBJECT => array( CHECK_SUBJECT => array(
RULE_IS_LIKE => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0})'), RULE_IS_LIKE => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0})'),
RULE_IS_NOT_LIKE => array('check0' => 'message_subject', 'function' => '!(preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0}))'), RULE_IS_NOT_LIKE => array('check0' => 'message_subject', 'function' => '!(preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0}))'),
RULE_IS => array('check0' => 'message_subject', 'function' => '{CHECK0} == {STRING}'), RULE_IS => array('check0' => 'message_subject', 'function' => '{CHECK0} == {STRING}'),
RULE_IS_NOT => array('check0' => 'message_subject', 'function' => '{CHECK0} != {STRING}'), RULE_IS_NOT => array('check0' => 'message_subject', 'function' => '{CHECK0} != {STRING}'),
RULE_BEGINS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/^" . preg_quote({STRING}) . "/i", {CHECK0})'), RULE_BEGINS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/^" . preg_quote({STRING}, "/") . "/i", {CHECK0})'),
RULE_ENDS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}) . "$/i", {CHECK0})')), RULE_ENDS_WITH => array('check0' => 'message_subject', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "$/i", {CHECK0})')),
CHECK_SENDER => array( CHECK_SENDER => array(
RULE_IS_LIKE => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0})'), RULE_IS_LIKE => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0})'),
RULE_IS_NOT_LIKE => array('check0' => 'username', 'function' => '!(preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0}))'), RULE_IS_NOT_LIKE => array('check0' => 'username', 'function' => '!(preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0}))'),
RULE_IS => array('check0' => 'username', 'function' => '{CHECK0} == {STRING}'), RULE_IS => array('check0' => 'username', 'function' => '{CHECK0} == {STRING}'),
RULE_IS_NOT => array('check0' => 'username', 'function' => '{CHECK0} != {STRING}'), RULE_IS_NOT => array('check0' => 'username', 'function' => '{CHECK0} != {STRING}'),
RULE_BEGINS_WITH => array('check0' => 'username', 'function' => 'preg_match("/^" . preg_quote({STRING}) . "/i", {CHECK0})'), RULE_BEGINS_WITH => array('check0' => 'username', 'function' => 'preg_match("/^" . preg_quote({STRING}, "/") . "/i", {CHECK0})'),
RULE_ENDS_WITH => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}) . "$/i", {CHECK0})'), RULE_ENDS_WITH => array('check0' => 'username', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "$/i", {CHECK0})'),
RULE_IS_FRIEND => array('check0' => 'friend', 'function' => '{CHECK0} == 1'), RULE_IS_FRIEND => array('check0' => 'friend', 'function' => '{CHECK0} == 1'),
RULE_IS_FOE => array('check0' => 'foe', 'function' => '{CHECK0} == 1'), RULE_IS_FOE => array('check0' => 'foe', 'function' => '{CHECK0} == 1'),
RULE_IS_USER => array('check0' => 'author_id', 'function' => '{CHECK0} == {USER_ID}'), RULE_IS_USER => array('check0' => 'author_id', 'function' => '{CHECK0} == {USER_ID}'),
RULE_IS_GROUP => array('check0' => 'author_in_group', 'function' => 'in_array({GROUP_ID}, {CHECK0})')), RULE_IS_GROUP => array('check0' => 'author_in_group', 'function' => 'in_array({GROUP_ID}, {CHECK0})')),
CHECK_MESSAGE => array( CHECK_MESSAGE => array(
RULE_IS_LIKE => array('check0' => 'message_text', 'function' => 'preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0})'), RULE_IS_LIKE => array('check0' => 'message_text', 'function' => 'preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0})'),
RULE_IS_NOT_LIKE => array('check0' => 'message_text', 'function' => '!(preg_match("/" . preg_quote({STRING}) . "/i", {CHECK0}))'), RULE_IS_NOT_LIKE => array('check0' => 'message_text', 'function' => '!(preg_match("/" . preg_quote({STRING}, "/") . "/i", {CHECK0}))'),
RULE_IS => array('check0' => 'message_text', 'function' => '{CHECK0} == {STRING}'), RULE_IS => array('check0' => 'message_text', 'function' => '{CHECK0} == {STRING}'),
RULE_IS_NOT => array('check0' => 'message_text', 'function' => '{CHECK0} != {STRING}')), RULE_IS_NOT => array('check0' => 'message_text', 'function' => '{CHECK0} != {STRING}')),

View file

@ -50,8 +50,8 @@ class fulltext_native extends search_backend
{ {
global $db, $config; global $db, $config;
$drop_char_match = array('^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '!'); $drop_char_match = array('^', '$', '(', ')', '<', '>', '`', '\'', '"', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '!', "\n", "\r");
$drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' '); $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ');
$this->get_ignore_words(); $this->get_ignore_words();
$this->get_synonyms(); $this->get_synonyms();
@ -65,15 +65,20 @@ class fulltext_native extends search_backend
} }
$match = array(); $match = array();
// New lines, carriage returns
$match[] = "#[\n\r]+#";
// NCRs like &nbsp; etc. // NCRs like &nbsp; etc.
$match[] = '#(&amp;|&)[\#a-z0-9]+?;#i'; $match[] = '#(&amp;|&)[a-z0-9]+?;#i';
// Filter out as above // Filter out as above
$keywords = preg_replace($match, ' ', strtolower(trim($keywords))); $keywords = preg_replace($match, ' ', strtolower(trim($keywords)));
// Filter out non alphabetical characters
$keywords = str_replace($drop_char_match, $drop_char_replace, $keywords); $keywords = str_replace($drop_char_match, $drop_char_replace, $keywords);
// Filter out ; and # but not &#[0-9]+;
$keywords = preg_replace('#&\#([0-9]+);#', '<$1>', $keywords);
$keywords = str_replace(array(';', '&', '#'), ' ', $keywords);
$keywords = str_replace(array('<', '>'), array('&#', ';'), $keywords);
// Split words // Split words
$this->split_words = explode(' ', preg_replace('#\s+#', ' ', $keywords)); $this->split_words = explode(' ', preg_replace('#\s+#', ' ', $keywords));
@ -100,7 +105,7 @@ class fulltext_native extends search_backend
} }
// check word length // check word length
$clean_len = strlen(str_replace('*', '', $word)); $clean_len = $this->word_length($word);
if (($clean_len < $config['fulltext_native_min_chars']) || ($clean_len > $config['fulltext_native_max_chars'])) if (($clean_len < $config['fulltext_native_min_chars']) || ($clean_len > $config['fulltext_native_max_chars']))
{ {
if ($prefixed) if ($prefixed)
@ -151,6 +156,14 @@ class fulltext_native extends search_backend
return false; return false;
} }
/**
* Returns the string length but it counts multibyte characters as single characters and ignores "*"
*/
function word_length($word)
{
return strlen(str_replace('*', '', preg_replace('#&\#[0-9]+;#', 'x', $word)));
}
/** /**
* Turns text into an array of words that can be stored in the word list table * Turns text into an array of words that can be stored in the word list table
*/ */
@ -165,29 +178,30 @@ class fulltext_native extends search_backend
if (!is_array($drop_char_match)) if (!is_array($drop_char_match))
{ {
$drop_char_match = array('-', '^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '\'', '!', '*', '+'); $drop_char_match = array('-', '^', '$', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '\'', '!', '*', '+', "\n", "\r");
$drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' '); $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
} }
$match = array(); $match = array();
// Comments for hardcoded bbcode elements (urls, smilies, html) // Comments for hardcoded bbcode elements (urls, smilies, html)
$match[] = '#<!\-\- .* \-\->(.*?)<!\-\- .* \-\->#is'; $match[] = '#<!\-\- .* \-\->(.*?)<!\-\- .* \-\->#is';
// New lines, carriage returns
$match[] = "#[\n\r]+#";
// NCRs like &nbsp; etc. // NCRs like &nbsp; etc.
$match[] = '#(&amp;|&)[\#a-z0-9]+?;#i'; $match[] = '#(&amp;|&)[a-z0-9]+;#i';
// Do not index code // Do not index code
$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is'; $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
// BBcode // BBcode
$match[] = '#\[\/?[a-z\*\+\-]+(?:=.*?)?(\:?[0-9a-z]{5,})\]#'; $match[] = '#\[\/?[a-z\*\+\-]+(?:=.*?)?(\:?[0-9a-z]{5,})\]#';
// Filter out ; and # but not &#[0-9]+;
//$match[] = '#(&\#[0-9]+;)|;|\#|&#';
$text = preg_replace($match, ' ', ' ' . strtolower(trim($text)) . ' '); $text = preg_replace($match, ' ', ' ' . strtolower(trim($text)) . ' ');
// Filter out non-alphabetical chars // Filter out non-alphabetical chars
$text = str_replace($drop_char_match, $drop_char_replace, $text); $text = str_replace($drop_char_match, $drop_char_replace, $text);
// Filter out ; and # but not &#[0-9]+;
$text = preg_replace('#&\#([0-9]+);#', '<$1>', $text);
$text = str_replace(array(';', '&', '#'), ' ', $text);
$text = str_replace(array('<', '>'), array('&#', ';'), $text);
// Split words // Split words
$text = explode(' ', preg_replace('#\s+#', ' ', trim($text))); $text = explode(' ', preg_replace('#\s+#', ' ', trim($text)));
@ -206,7 +220,7 @@ class fulltext_native extends search_backend
for ($i = 0, $n = sizeof($text); $i < $n; $i++) for ($i = 0, $n = sizeof($text); $i < $n; $i++)
{ {
$text[$i] = trim($text[$i]); $text[$i] = trim($text[$i]);
if (strlen($text[$i]) < $config['fulltext_native_min_chars'] || strlen($text[$i]) > $config['fulltext_native_max_chars']) if ($this->word_length($text[$i]) < $config['fulltext_native_min_chars'] || $this->word_length($text[$i]) > $config['fulltext_native_max_chars'])
{ {
unset($text[$i]); unset($text[$i]);
} }

View file

@ -29,8 +29,8 @@ $topic_id = request_var('t', 0);
$view = request_var('view', ''); $view = request_var('view', '');
$submit = request_var('submit', false); $submit = request_var('submit', false);
$keywords = request_var('keywords', ''); $keywords = request_var('keywords', '', true);
$add_keywords = request_var('add_keywords', ''); $add_keywords = request_var('add_keywords', '', true);
$author = request_var('author', ''); $author = request_var('author', '');
$show_results = ($topic_id) ? 'posts' : request_var('sr', 'posts'); $show_results = ($topic_id) ? 'posts' : request_var('sr', 'posts');
$show_results = ($show_results == 'posts') ? 'posts' : 'topics'; $show_results = ($show_results == 'posts') ? 'posts' : 'topics';
@ -657,7 +657,7 @@ if ($keywords || $author || $search_id || $submit)
if ($hilit) if ($hilit)
{ {
$row['post_text'] = preg_replace('#(?!<.*)(?<!\w)(' . preg_quote($hilit) . ')(?!\w|[^<>]*>)#i', '<span class="posthilit">$1</span>', $row['post_text']); $row['post_text'] = preg_replace('#(?!<.*)(?<!\w)(' . preg_quote($hilit, '#') . ')(?!\w|[^<>]*>)#i', '<span class="posthilit">$1</span>', $row['post_text']);
} }
$row['post_text'] = smiley_text($row['post_text']); $row['post_text'] = smiley_text($row['post_text']);