mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-28 06:08:52 +00:00
the mysql search backend now tries to use mbstring's regex engine if PCRE does not have property support
git-svn-id: file:///svn/phpbb/trunk@7247 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
parent
3abc3dd331
commit
9b50de6b89
2 changed files with 86 additions and 12 deletions
|
@ -33,6 +33,7 @@ class fulltext_mysql extends search_backend
|
||||||
var $search_query;
|
var $search_query;
|
||||||
var $common_words = array();
|
var $common_words = array();
|
||||||
var $pcre_properties = false;
|
var $pcre_properties = false;
|
||||||
|
var $mbstring_regex = false;
|
||||||
|
|
||||||
function fulltext_mysql(&$error)
|
function fulltext_mysql(&$error)
|
||||||
{
|
{
|
||||||
|
@ -45,6 +46,11 @@ class fulltext_mysql extends search_backend
|
||||||
$this->pcre_properties = true;
|
$this->pcre_properties = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (function_exists('mb_ereg'))
|
||||||
|
{
|
||||||
|
$this->mbstring_regex = true;
|
||||||
|
}
|
||||||
|
$this->pcre_properties = false;
|
||||||
$error = false;
|
$error = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,10 +135,42 @@ class fulltext_mysql extends search_backend
|
||||||
$keywords = preg_replace($match, ' ', trim($keywords));
|
$keywords = preg_replace($match, ' ', trim($keywords));
|
||||||
|
|
||||||
// Split words
|
// Split words
|
||||||
$split_keywords = preg_replace(($this->pcre_properties) ? '#([^\p{L}\p{N}\'*])#u' : '#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
|
if ($this->pcre_properties)
|
||||||
$matches = array();
|
{
|
||||||
preg_match_all(($this->pcre_properties) ? '#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u' : '#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $split_keywords, $matches);
|
$split_keywords = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
|
||||||
$this->split_words = $matches[1];
|
}
|
||||||
|
else if ($this->mbstring_regex)
|
||||||
|
{
|
||||||
|
$split_keywords = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($keywords)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$split_keywords = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->pcre_properties)
|
||||||
|
{
|
||||||
|
$matches = array();
|
||||||
|
preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $split_keywords, $matches);
|
||||||
|
$this->split_words = $matches[1];
|
||||||
|
}
|
||||||
|
else if ($this->mbstring_regex)
|
||||||
|
{
|
||||||
|
mb_regex_encoding('UTF-8');
|
||||||
|
mb_ereg_search_init($split_keywords, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)');
|
||||||
|
|
||||||
|
while (($word = mb_ereg_search_regs()))
|
||||||
|
{
|
||||||
|
$this->split_words[] = $word[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$matches = array();
|
||||||
|
preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $split_keywords, $matches);
|
||||||
|
$this->split_words = $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (sizeof($this->ignore_words))
|
if (sizeof($this->ignore_words))
|
||||||
{
|
{
|
||||||
|
@ -180,10 +218,41 @@ class fulltext_mysql extends search_backend
|
||||||
$this->get_synonyms();
|
$this->get_synonyms();
|
||||||
|
|
||||||
// Split words
|
// Split words
|
||||||
$text = preg_replace(($this->pcre_properties) ? '#([^\p{L}\p{N}\'*])#u' : '#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
|
if ($this->pcre_properties)
|
||||||
$matches = array();
|
{
|
||||||
preg_match_all(($this->pcre_properties) ? '#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u' : '#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches);
|
$text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
|
||||||
$text = $matches[1];
|
}
|
||||||
|
else if ($this->mbstring_regex)
|
||||||
|
{
|
||||||
|
$text = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($text)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$text = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->pcre_properties)
|
||||||
|
{
|
||||||
|
$matches = array();
|
||||||
|
preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches);
|
||||||
|
$text = $matches[1];
|
||||||
|
}
|
||||||
|
else if ($this->mbstring_regex)
|
||||||
|
{
|
||||||
|
mb_regex_encoding('UTF-8');
|
||||||
|
mb_ereg_search_init($text, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)');
|
||||||
|
|
||||||
|
while (($word = mb_ereg_search_regs()))
|
||||||
|
{
|
||||||
|
$text[] = $word[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$matches = array();
|
||||||
|
preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches);
|
||||||
|
$text = $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
if (sizeof($this->ignore_words))
|
if (sizeof($this->ignore_words))
|
||||||
{
|
{
|
||||||
|
@ -794,12 +863,15 @@ class fulltext_mysql extends search_backend
|
||||||
{
|
{
|
||||||
global $user, $config;
|
global $user, $config;
|
||||||
|
|
||||||
|
|
||||||
$tpl = '
|
$tpl = '
|
||||||
<dl>
|
<dl>
|
||||||
<dt><label>' . $user->lang['FULLTEXT_MYSQL_UNICODE'] . '</label><br /><span>' . $user->lang['FULLTEXT_MYSQL_UNICODE_EXPLAIN'] . '</span></dt>
|
<dt><label>' . $user->lang['FULLTEXT_MYSQL_PCRE'] . '</label><br /><span>' . $user->lang['FULLTEXT_MYSQL_PCRE_EXPLAIN'] . '</span></dt>
|
||||||
<dd>' . (($this->pcre_properties) ? $user->lang['YES'] : $user->lang['NO']) . ' (PHP ' . PHP_VERSION . ')</dd>
|
<dd>' . (($this->pcre_properties) ? $user->lang['YES'] : $user->lang['NO']) . ' (PHP ' . PHP_VERSION . ')</dd>
|
||||||
</dl>
|
</dl>
|
||||||
|
<dl>
|
||||||
|
<dt><label>' . $user->lang['FULLTEXT_MYSQL_MBSTRING'] . '</label><br /><span>' . $user->lang['FULLTEXT_MYSQL_MBSTRING_EXPLAIN'] . '</span></dt>
|
||||||
|
<dd>' . (($this->mbstring_regex) ? $user->lang['YES'] : $user->lang['NO']). '</dd>
|
||||||
|
</dl>
|
||||||
';
|
';
|
||||||
|
|
||||||
// These are fields required in the config table
|
// These are fields required in the config table
|
||||||
|
|
|
@ -52,8 +52,10 @@ $lang = array_merge($lang, array(
|
||||||
'FULLTEXT_MYSQL_SUBJECT_CARDINALITY' => 'Cardinality of the post_subject fulltext index (estimate of unique values)',
|
'FULLTEXT_MYSQL_SUBJECT_CARDINALITY' => 'Cardinality of the post_subject fulltext index (estimate of unique values)',
|
||||||
'FULLTEXT_MYSQL_TEXT_CARDINALITY' => 'Cardinality of the post_text fulltext index (estimate of unique values)',
|
'FULLTEXT_MYSQL_TEXT_CARDINALITY' => 'Cardinality of the post_text fulltext index (estimate of unique values)',
|
||||||
'FULLTEXT_MYSQL_TOTAL_POSTS' => 'Total number of indexed posts',
|
'FULLTEXT_MYSQL_TOTAL_POSTS' => 'Total number of indexed posts',
|
||||||
'FULLTEXT_MYSQL_UNICODE' => 'Support for non-latin UTF-8 characters:',
|
'FULLTEXT_MYSQL_MBSTRING' => 'Support for non-latin UTF-8 characters using mbstring:',
|
||||||
'FULLTEXT_MYSQL_UNICODE_EXPLAIN' => 'This search backend requires PCRE unicode character properties, only available in PHP 4.4, 5.1 and above, if you want to search for non-latin characters.',
|
'FULLTEXT_MYSQL_PCRE' => 'Support for non-latin UTF-8 characters using PCRE:',
|
||||||
|
'FULLTEXT_MYSQL_MBSTRING_EXPLAIN' => 'If PCRE does not have unicode character properties, the search backend will try to use mbstring\'s regular expression engine.',
|
||||||
|
'FULLTEXT_MYSQL_PCRE_EXPLAIN' => 'This search backend requires PCRE unicode character properties, only available in PHP 4.4, 5.1 and above, if you want to search for non-latin characters.',
|
||||||
|
|
||||||
'GENERAL_SEARCH_SETTINGS' => 'General search settings',
|
'GENERAL_SEARCH_SETTINGS' => 'General search settings',
|
||||||
'GO_TO_SEARCH_INDEX' => 'Go to search index page',
|
'GO_TO_SEARCH_INDEX' => 'Go to search index page',
|
||||||
|
|
Loading…
Add table
Reference in a new issue