mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-28 14:18:52 +00:00
[ticket/16234] Fix & enhance Sphinx search backend functionality
PHPBB3-16234 PHPBB3-16233 PHPBB3-15367 PHPBB3-13958
This commit is contained in:
parent
f0f1cd37dd
commit
4fb9c1cd98
1 changed files with 42 additions and 29 deletions
|
@ -436,21 +436,21 @@ class fulltext_sphinx
|
||||||
public function split_keywords(&$keywords, $terms)
|
public function split_keywords(&$keywords, $terms)
|
||||||
{
|
{
|
||||||
// Keep quotes and new lines
|
// Keep quotes and new lines
|
||||||
$keywords = str_replace(array('"', "\n"), array('"', ' '), trim($keywords));
|
$keywords = str_replace(array['"', "\n"], array['"', ' '], trim($keywords));
|
||||||
|
|
||||||
if ($terms == 'all')
|
if ($terms == 'all')
|
||||||
{
|
{
|
||||||
// Replaces verbal operators OR and NOT with special characters | and -, unless appearing within quotation marks
|
// Replaces verbal operators OR and NOT with special characters | and -, unless appearing within quotation marks
|
||||||
$match = array('#\sor\s(?=([^"]*"[^"]*")*[^"]*$)#i', '#\snot\s(?=([^"]*"[^"]*")*[^"]*$)#i');
|
$match = array['#\sor\s(?=([^"]*"[^"]*")*[^"]*$)#i', '#\snot\s(?=([^"]*"[^"]*")*[^"]*$)#i'];
|
||||||
$replace = array(' | ', ' -');
|
$replace = array[' | ', ' -'];
|
||||||
|
|
||||||
$keywords = preg_replace($match, $replace, $keywords);
|
$keywords = preg_replace($match, $replace, $keywords);
|
||||||
$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED);
|
$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
$match = array ( '\\', '(',')','|','!','@','~', '/', '^', '$', '=','&', '<', '>');
|
$match = array['\\', '(',')','|','!','@','~', '/', '^', '$', '=','&', '<', '>'];
|
||||||
$replace = array ( ' ', ' ', ' ', ' ',' ',' ',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
|
$replace = array[' ', ' ', ' ', ' ',' ',' ',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
|
||||||
|
|
||||||
$keywords = str_replace($match, $replace, $keywords);
|
$keywords = str_replace($match, $replace, $keywords);
|
||||||
$this->sphinx->SetMatchMode(SPH_MATCH_ANY);
|
$this->sphinx->SetMatchMode(SPH_MATCH_ANY);
|
||||||
|
@ -467,34 +467,47 @@ class fulltext_sphinx
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans search query passed into Sphinx search engine, as follows:
|
* Cleans search query passed into Sphinx search engine, as follows:
|
||||||
* 1. Hyphenated words are replaced with keyword search for either the exact phrase with spaces or as a single word without spaces eg search for "know-it-all" becomes ("know it all"|"knowitall*")
|
* 1. Hyphenated words are replaced with keyword search for either the exact phrase with spaces
|
||||||
|
* or as a single word without spaces eg search for "know-it-all" becomes ("know it all"|"knowitall*")
|
||||||
* 2. Words with apostrophes are contracted eg "it's" becomes "its"
|
* 2. Words with apostrophes are contracted eg "it's" becomes "its"
|
||||||
* 3. <, >, " and & are decoded from HTML entities.
|
* 3. <, >, " and & are decoded from HTML entities.
|
||||||
* 4. Following special characters used as search operators in Sphinx are preserved when used with correct syntax:
|
* 4. Following special characters used as search operators in Sphinx are preserved when used with correct syntax:
|
||||||
* (a) quorum matching: "the world is a wonderful place"/3 -- finds 3 of the words within the phrase. Number must be between 1 and 9.
|
* (a) quorum matching: "the world is a wonderful place"/3
|
||||||
* (b) proximity search: "hello world"~10 -- finds hello and world within 10 words of each other. Number can be between 1 and 99.
|
* Finds 3 of the words within the phrase. Number must be between 1 and 9.
|
||||||
* (c) strict word order: aaa << bbb << ccc -- finds "aaa" only where it appears before "bbb" and only where "bbb" appears before "ccc".
|
* (b) proximity search: "hello world"~10
|
||||||
* (d) exact match operator: if lemmatizer or stemming enabled, search will find exact match only and ignore other grammatical forms of the same word stem..
|
* Finds hello and world within 10 words of each other. Number can be between 1 and 99.
|
||||||
* eg raining =cats and =dogs -- will not return "raining cat and dog"
|
* (c) strict word order: aaa << bbb << ccc
|
||||||
* eg ="search this exact phrase" -- will not return "searched this exact phrase", "searching these exact phrases".
|
* Finds "aaa" only where it appears before "bbb" and only where "bbb" appears before "ccc".
|
||||||
* 5. Special characters /, ~, << and = not complying with the correct syntax and other reserved operators are escaped and searched literally.
|
* (d) exact match operator: if lemmatizer or stemming enabled,
|
||||||
* Special characters not explicitly listed in charset_table or blend_chars in sphinx.conf will not be indexed and keywords containing them will be ignored by Sphinx.
|
* search will find exact match only and ignore other grammatical forms of the same word stem.
|
||||||
|
* eg. raining =cats and =dogs
|
||||||
|
* will not return "raining cat and dog"
|
||||||
|
* eg. ="search this exact phrase"
|
||||||
|
* will not return "searched this exact phrase", "searching these exact phrases".
|
||||||
|
* 5. Special characters /, ~, << and = not complying with the correct syntax
|
||||||
|
* and other reserved operators are escaped and searched literally.
|
||||||
|
* Special characters not explicitly listed in charset_table or blend_chars in sphinx.conf
|
||||||
|
* will not be indexed and keywords containing them will be ignored by Sphinx.
|
||||||
* By default, only $, %, & and @ characters are indexed and searchable.
|
* By default, only $, %, & and @ characters are indexed and searchable.
|
||||||
* String transformation is in backend only and not visible to the end user nor reflected in the results page URL or keyword highlighting.
|
* String transformation is in backend only and not visible to the end user
|
||||||
|
* nor reflected in the results page URL or keyword highlighting.
|
||||||
|
*
|
||||||
|
* @param string $search_string
|
||||||
|
* @return string
|
||||||
*/
|
*/
|
||||||
public function sphinx_clean_search_string($search_string)
|
public function sphinx_clean_search_string($search_string)
|
||||||
{
|
{
|
||||||
$from = array('@', '^', '$', '!', '<', '>', '"', '&', '\'');
|
$from = array['@', '^', '$', '!', '<', '>', '"', '&', '\''];
|
||||||
$to = array('\@', '\^', '\$', '\!', '<', '>', '"', '&', '');
|
$to = array['\@', '\^', '\$', '\!', '<', '>', '"', '&', ''];
|
||||||
|
|
||||||
$search_string = str_replace($from, $to, $search_string);
|
$search_string = str_replace($from, $to, $search_string);
|
||||||
|
|
||||||
$search_string = strrev($search_string);
|
$search_string = strrev($search_string);
|
||||||
$search_string = preg_replace(array('#\/(?!"[^"]+")#', '#~(?!"[^"]+")#'), array('/\\', '~\\'), $search_string);
|
$search_string = preg_replace(array['#\/(?!"[^"]+")#', '#~(?!"[^"]+")#'], array['/\\', '~\\'], $search_string);
|
||||||
$search_string = strrev($search_string);
|
$search_string = strrev($search_string);
|
||||||
|
|
||||||
$match = array('#(/|\\\\/)(?)#', '#(~|\\\\~)(?!\d{1,2}(\s|$))#', '#((?:\p{L}|\p{N})+)-((?:\p{L}|\p{N})+)(?:-((?:\p{L}|\p{N})+))?(?:-((?:\p{L}|\p{N})+))?#i', '#<<\s*$#', '#(\S\K=|=(?=\s)|=$)#');
|
$match = array['#(/|\\\\/)(?)#', '#(~|\\\\~)(?!\d{1,2}(\s|$))#', '#((?:\p{L}|\p{N})+)-((?:\p{L}|\p{N})+)(?:-((?:\p{L}|\p{N})+))?(?:-((?:\p{L}|\p{N})+))?#i', '#<<\s*$#', '#(\S\K=|=(?=\s)|=$)#'];
|
||||||
$replace = array('\/', '\~', '("$1 $2 $3 $4"|$1$2$3$4*)', '\<\<', '\=');
|
$replace = array['\/', '\~', '("$1 $2 $3 $4"|$1$2$3$4*)', '\<\<', '\='];
|
||||||
|
|
||||||
$search_string = preg_replace($match, $replace, $search_string);
|
$search_string = preg_replace($match, $replace, $search_string);
|
||||||
$search_string = preg_replace('#\s+"\|#', '"|', $search_string);
|
$search_string = preg_replace('#\s+"\|#', '"|', $search_string);
|
||||||
|
|
Loading…
Add table
Reference in a new issue