diff --git a/phpBB/includes/search/fulltext_mysql.php b/phpBB/includes/search/fulltext_mysql.php index 7c67b755e1..52779ca835 100644 --- a/phpBB/includes/search/fulltext_mysql.php +++ b/phpBB/includes/search/fulltext_mysql.php @@ -33,6 +33,7 @@ class fulltext_mysql extends search_backend var $search_query; var $common_words = array(); var $pcre_properties = false; + var $mbstring_regex = false; function fulltext_mysql(&$error) { @@ -45,6 +46,11 @@ class fulltext_mysql extends search_backend $this->pcre_properties = true; } + if (function_exists('mb_ereg')) + { + $this->mbstring_regex = true; + } +$this->pcre_properties = false; $error = false; } @@ -129,10 +135,42 @@ class fulltext_mysql extends search_backend $keywords = preg_replace($match, ' ', trim($keywords)); // Split words - $split_keywords = preg_replace(($this->pcre_properties) ? '#([^\p{L}\p{N}\'*])#u' : '#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords))); - $matches = array(); - preg_match_all(($this->pcre_properties) ? '#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u' : '#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $split_keywords, $matches); - $this->split_words = $matches[1]; + if ($this->pcre_properties) + { + $split_keywords = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords))); + } + else if ($this->mbstring_regex) + { + $split_keywords = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($keywords))); + } + else + { + $split_keywords = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords))); + } + + if ($this->pcre_properties) + { + $matches = array(); + preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $split_keywords, $matches); + $this->split_words = $matches[1]; + } + else if ($this->mbstring_regex) + { + mb_regex_encoding('UTF-8'); + mb_ereg_search_init($split_keywords, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)'); + + while (($word = mb_ereg_search_regs())) + { + $this->split_words[] = $word[1]; + } + } + else + { + $matches = array(); + preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $split_keywords, $matches); + $this->split_words = $matches[1]; + } + if (sizeof($this->ignore_words)) { @@ -180,10 +218,41 @@ class fulltext_mysql extends search_backend $this->get_synonyms(); // Split words - $text = preg_replace(($this->pcre_properties) ? '#([^\p{L}\p{N}\'*])#u' : '#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); - $matches = array(); - preg_match_all(($this->pcre_properties) ? '#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u' : '#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches); - $text = $matches[1]; + if ($this->pcre_properties) + { + $text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); + } + else if ($this->mbstring_regex) + { + $text = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($text))); + } + else + { + $text = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); + } + + if ($this->pcre_properties) + { + $matches = array(); + preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches); + $text = $matches[1]; + } + else if ($this->mbstring_regex) + { + mb_regex_encoding('UTF-8'); + mb_ereg_search_init($text, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)'); + + while (($word = mb_ereg_search_regs())) + { + $text[] = $word[1]; + } + } + else + { + $matches = array(); + preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $text, $matches); + $text = $matches[1]; + } if (sizeof($this->ignore_words)) { @@ -794,12 +863,15 @@ class fulltext_mysql extends search_backend { global $user, $config; - $tpl = '
-

' . $user->lang['FULLTEXT_MYSQL_UNICODE_EXPLAIN'] . '
+

' . $user->lang['FULLTEXT_MYSQL_PCRE_EXPLAIN'] . '
' . (($this->pcre_properties) ? $user->lang['YES'] : $user->lang['NO']) . ' (PHP ' . PHP_VERSION . ')
+
+

' . $user->lang['FULLTEXT_MYSQL_MBSTRING_EXPLAIN'] . '
+
' . (($this->mbstring_regex) ? $user->lang['YES'] : $user->lang['NO']). '
+
'; // These are fields required in the config table diff --git a/phpBB/language/en/acp/search.php b/phpBB/language/en/acp/search.php index 8224a6e1bc..5982eec31a 100644 --- a/phpBB/language/en/acp/search.php +++ b/phpBB/language/en/acp/search.php @@ -52,8 +52,10 @@ $lang = array_merge($lang, array( 'FULLTEXT_MYSQL_SUBJECT_CARDINALITY' => 'Cardinality of the post_subject fulltext index (estimate of unique values)', 'FULLTEXT_MYSQL_TEXT_CARDINALITY' => 'Cardinality of the post_text fulltext index (estimate of unique values)', 'FULLTEXT_MYSQL_TOTAL_POSTS' => 'Total number of indexed posts', - 'FULLTEXT_MYSQL_UNICODE' => 'Support for non-latin UTF-8 characters:', - 'FULLTEXT_MYSQL_UNICODE_EXPLAIN' => 'This search backend requires PCRE unicode character properties, only available in PHP 4.4, 5.1 and above, if you want to search for non-latin characters.', + 'FULLTEXT_MYSQL_MBSTRING' => 'Support for non-latin UTF-8 characters using mbstring:', + 'FULLTEXT_MYSQL_PCRE' => 'Support for non-latin UTF-8 characters using PCRE:', + 'FULLTEXT_MYSQL_MBSTRING_EXPLAIN' => 'If PCRE does not have unicode character properties, the search backend will try to use mbstring\'s regular expression engine.', + 'FULLTEXT_MYSQL_PCRE_EXPLAIN' => 'This search backend requires PCRE unicode character properties, only available in PHP 4.4, 5.1 and above, if you want to search for non-latin characters.', 'GENERAL_SEARCH_SETTINGS' => 'General search settings', 'GO_TO_SEARCH_INDEX' => 'Go to search index page',