diff --git a/phpBB/includes/functions_sphinx.php b/phpBB/includes/functions_sphinx.php new file mode 100644 index 0000000000..976f93f77c --- /dev/null +++ b/phpBB/includes/functions_sphinx.php @@ -0,0 +1,507 @@ +read($filename); + } + } + + /** + * Get a section object by its name + * + * @param string $name The name of the section that shall be returned + * @return sphinx_config_section The section object or null if none was found + */ + function &get_section_by_name($name) + { + for ($i = 0, $n = sizeof($this->sections); $i < $n; $i++) + { + // make sure this is really a section object and not a comment + if (is_a($this->sections[$i], 'sphinx_config_section') && $this->sections[$i]->get_name() == $name) + { + return $this->sections[$i]; + } + } + $null = null; + return $null; + } + + /** + * Appends a new empty section to the end of the config + * + * @param string $name The name for the new section + * @return sphinx_config_section The newly created section object + */ + function &add_section($name) + { + $this->sections[] = new sphinx_config_section($name, ''); + return $this->sections[sizeof($this->sections) - 1]; + } + + /** + * Parses the config file at the given path, which is stored in $this->loaded for later use + * + * @param string $filename The path to the config file + */ + function read($filename) + { + // split the file into lines, we'll process it line by line + $config_file = file($filename); + + $this->sections = array(); + + $section = null; + $found_opening_bracket = false; + $in_value = false; + + foreach ($config_file as $i => $line) + { + // if the value of a variable continues to the next line because the line break was escaped + // then we don't trim leading space but treat it as a part of the value + if ($in_value) + { + $line = rtrim($line); + } + else + { + $line = trim($line); + } + + // if we're not inside a section look for one + if (!$section) + { + // add empty lines and comments as comment objects to the section list + // that way they're not deleted when reassembling the file from the sections + if (!$line || $line[0] == '#') + { + $this->sections[] = new sphinx_config_comment($config_file[$i]); + continue; + } + else + { + // otherwise we scan the line reading the section name until we find + // an opening curly bracket or a comment + $section_name = ''; + $section_name_comment = ''; + $found_opening_bracket = false; + for ($j = 0, $n = strlen($line); $j < $n; $j++) + { + if ($line[$j] == '#') + { + $section_name_comment = substr($line, $j); + break; + } + + if ($found_opening_bracket) + { + continue; + } + + if ($line[$j] == '{') + { + $found_opening_bracket = true; + continue; + } + + $section_name .= $line[$j]; + } + + // and then we create the new section object + $section_name = trim($section_name); + $section = new sphinx_config_section($section_name, $section_name_comment); + } + } + else // if we're looking for variables inside a section + { + $skip_first = false; + + // if we're not in a value continuing over the line feed + if (!$in_value) + { + // then add empty lines and comments as comment objects to the variable list + // of this section so they're not deleted on reassembly + if (!$line || $line[0] == '#') + { + $section->add_variable(new sphinx_config_comment($config_file[$i])); + continue; + } + + // as long as we haven't yet actually found an opening bracket for this section + // we treat everything as comments so it's not deleted either + if (!$found_opening_bracket) + { + if ($line[0] == '{') + { + $skip_first = true; + $line = substr($line, 1); + $found_opening_bracket = true; + } + else + { + $section->add_variable(new sphinx_config_comment($config_file[$i])); + continue; + } + } + } + + // if we did not find a comment in this line or still add to the previous line's value ... + if ($line || $in_value) + { + if (!$in_value) + { + $name = ''; + $value = ''; + $comment = ''; + $found_assignment = false; + } + $in_value = false; + $end_section = false; + + // ... then we should prase this line char by char: + // - first there's the variable name + // - then an equal sign + // - the variable value + // - possibly a backslash before the linefeed in this case we need to continue + // parsing the value in the next line + // - a # indicating that the rest of the line is a comment + // - a closing curly bracket indicating the end of this section + for ($j = 0, $n = strlen($line); $j < $n; $j++) + { + if ($line[$j] == '#') + { + $comment = substr($line, $j); + break; + } + else if ($line[$j] == '}') + { + $comment = substr($line, $j + 1); + $end_section = true; + break; + } + else if (!$found_assignment) + { + if ($line[$j] == '=') + { + $found_assignment = true; + } + else + { + $name .= $line[$j]; + } + } + else + { + if ($line[$j] == '\\' && $j == $n - 1) + { + $value .= "\n"; + $in_value = true; + continue 2; // go to the next line and keep processing the value in there + } + $value .= $line[$j]; + } + } + + // if a name and an equal sign were found then we have append a new variable object to the section + if ($name && $found_assignment) + { + $section->add_variable(new sphinx_config_variable(trim($name), trim($value), ($end_section) ? '' : $comment)); + continue; + } + + // if we found a closing curly bracket this section has been completed and we can append it to the section list + // and continue with looking for the next section + if ($end_section) + { + $section->set_end_comment($comment); + $this->sections[] = $section; + $section = null; + continue; + } + } + + // if we did not find anything meaningful up to here, then just treat it as a comment + $comment = ($skip_first) ? "\t" . substr(ltrim($config_file[$i]), 1) : $config_file[$i]; + $section->add_variable(new sphinx_config_comment($comment)); + } + } + + // keep the filename for later use + $this->loaded = $filename; + } + + /** + * Writes the config data into a file + * + * @param string $filename The optional filename into which the config data shall be written. + * If it's not specified it will be written into the file that the config + * was originally read from. + */ + function write($filename = false) + { + if ($filename === false && $this->loaded) + { + $filename = $this->loaded; + } + + $data = ""; + foreach ($this->sections as $section) + { + $data .= $section->to_string(); + } + + $fp = fopen($filename, 'wb'); + fwrite($fp, $data); + fclose($fp); + } +} + +/** +* sphinx_config_section +* Represents a single section inside the sphinx configuration +*/ +class sphinx_config_section +{ + var $name; + var $comment; + var $end_comment; + var $variables = array(); + + /** + * Construct a new section + * + * @param string $name Name of the section + * @param string $comment Comment that should be appended after the name in the + * textual format. + */ + function sphinx_config_section($name, $comment) + { + $this->name = $name; + $this->comment = $comment; + $this->end_comment = ''; + } + + /** + * Add a variable object to the list of variables in this section + * + * @param sphinx_config_variable $variable The variable object + */ + function add_variable($variable) + { + $this->variables[] = $variable; + } + + /** + * Adds a comment after the closing bracket in the textual representation + */ + function set_end_comment($end_comment) + { + $this->end_comment = $end_comment; + } + + /** + * Getter for the name of this section + * + * @return string Section's name + */ + function get_name() + { + return $this->name; + } + + /** + * Get a variable object by its name + * + * @param string $name The name of the variable that shall be returned + * @return sphinx_config_section The first variable object from this section with the + * given name or null if none was found + */ + function &get_variable_by_name($name) + { + for ($i = 0, $n = sizeof($this->variables); $i < $n; $i++) + { + // make sure this is a variable object and not a comment + if (is_a($this->variables[$i], 'sphinx_config_variable') && $this->variables[$i]->get_name() == $name) + { + return $this->variables[$i]; + } + } + $null = null; + return $null; + } + + /** + * Deletes all variables with the given name + * + * @param string $name The name of the variable objects that are supposed to be removed + */ + function delete_variables_by_name($name) + { + for ($i = 0; $i < sizeof($this->variables); $i++) + { + // make sure this is a variable object and not a comment + if (is_a($this->variables[$i], 'sphinx_config_variable') && $this->variables[$i]->get_name() == $name) + { + array_splice($this->variables, $i, 1); + $i--; + } + } + } + + /** + * Create a new variable object and append it to the variable list of this section + * + * @param string $name The name for the new variable + * @param string $value The value for the new variable + * @return sphinx_config_variable Variable object that was created + */ + function &create_variable($name, $value) + { + $this->variables[] = new sphinx_config_variable($name, $value, ''); + return $this->variables[sizeof($this->variables) - 1]; + } + + /** + * Turns this object into a string which can be written to a config file + * + * @return string Config data in textual form, parsable for sphinx + */ + function to_string() + { + $content = $this->name . " " . $this->comment . "\n{\n"; + + // make sure we don't get too many newlines after the opening bracket + while (trim($this->variables[0]->to_string()) == "") + { + array_shift($this->variables); + } + + foreach ($this->variables as $variable) + { + $content .= $variable->to_string(); + } + $content .= '}' . $this->end_comment . "\n"; + + return $content; + } +} + +/** +* sphinx_config_variable +* Represents a single variable inside the sphinx configuration +*/ +class sphinx_config_variable +{ + var $name; + var $value; + var $comment; + + /** + * Constructs a new variable object + * + * @param string $name Name of the variable + * @param string $value Value of the variable + * @param string $comment Optional comment after the variable in the + * config file + */ + function sphinx_config_variable($name, $value, $comment) + { + $this->name = $name; + $this->value = $value; + $this->comment = $comment; + } + + /** + * Getter for the variable's name + * + * @return string The variable object's name + */ + function get_name() + { + return $this->name; + } + + /** + * Allows changing the variable's value + * + * @param string $value New value for this variable + */ + function set_value($value) + { + $this->value = $value; + } + + /** + * Turns this object into a string readable by sphinx + * + * @return string Config data in textual form + */ + function to_string() + { + return "\t" . $this->name . ' = ' . str_replace("\n", "\\\n", $this->value) . ' ' . $this->comment . "\n"; + } +} + + +/** +* sphinx_config_comment +* Represents a comment inside the sphinx configuration +*/ +class sphinx_config_comment +{ + var $exact_string; + + /** + * Create a new comment + * + * @param string $exact_string The content of the comment including newlines, leading whitespace, etc. + */ + function sphinx_config_comment($exact_string) + { + $this->exact_string = $exact_string; + } + + /** + * Simply returns the comment as it was created + * + * @return string The exact string that was specified in the constructor + */ + function to_string() + { + return $this->exact_string; + } +} + +?> \ No newline at end of file diff --git a/phpBB/includes/search/fulltext_sphinx.php b/phpBB/includes/search/fulltext_sphinx.php new file mode 100644 index 0000000000..e0c467df93 --- /dev/null +++ b/phpBB/includes/search/fulltext_sphinx.php @@ -0,0 +1,1166 @@ +id = $config['avatar_salt']; + $this->indexes = 'index_phpbb_' . $this->id . '_delta;index_phpbb_' . $this->id . '_main'; + + $this->sphinx = new SphinxClient (); + + if (!empty($config['fulltext_sphinx_configured'])) + { + if ($config['fulltext_sphinx_autorun'] && !file_exists($config['fulltext_sphinx_data_path'] . 'searchd.pid') && $this->index_created(true)) + { + $this->shutdown_searchd(); +// $cwd = getcwd(); +// chdir($config['fulltext_sphinx_bin_path']); + exec($config['fulltext_sphinx_bin_path'] . SEARCHD_NAME . ' --config ' . $config['fulltext_sphinx_config_path'] . 'sphinx.conf >> ' . $config['fulltext_sphinx_data_path'] . 'log/searchd-startup.log 2>&1 &'); +// chdir($cwd); + } + + // we only support localhost for now + $this->sphinx->SetServer('localhost', (isset($config['fulltext_sphinx_port']) && $config['fulltext_sphinx_port']) ? (int) $config['fulltext_sphinx_port'] : 3312); + } + + $config['fulltext_sphinx_min_word_len'] = 2; + $config['fulltext_sphinx_max_word_len'] = 400; + + $error = false; + } + + /** + * Checks permissions and paths, if everything is correct it generates the config file + */ + function init() + { + global $db, $user, $config; + + if ($db->sql_layer != 'mysql' && $db->sql_layer != 'mysql4' && $db->sql_layer != 'mysqli') + { + return $user->lang['FULLTEXT_SPHINX_WRONG_DATABASE']; + } + + if ($error = $this->config_updated()) + { + return $error; + } + + // move delta to main index each hour + set_config('search_gc', 3600); + + return false; + } + + function config_updated() + { + global $db, $user, $config, $phpbb_root_path, $phpEx; + + if ($config['fulltext_sphinx_autoconf']) + { + $paths = array('fulltext_sphinx_bin_path', 'fulltext_sphinx_config_path', 'fulltext_sphinx_data_path'); + + // check for completeness and add trailing slash if it's not present + foreach ($paths as $path) + { + if (empty($config[$path])) + { + return $user->lang['FULLTEXT_SPHINX_UNCONFIGURED']; + } + if ($config[$path] && substr($config[$path], -1) != '/') + { + set_config($path, $config[$path] . '/'); + } + } + } + + $executables = array( + $config['fulltext_sphinx_bin_path'] . INDEXER_NAME, + $config['fulltext_sphinx_bin_path'] . SEARCHD_NAME, + ); + + if ($config['fulltext_sphinx_autorun']) + { + foreach ($executables as $executable) + { + if (!file_exists($executable)) + { + return sprintf($user->lang['FULLTEXT_SPHINX_FILE_NOT_FOUND'], $executable); + } + + if (!function_exists('exec')) + { + return $user->lang['FULLTEXT_SPHINX_REQUIRES_EXEC']; + } + + $output = array(); + @exec($executable, $output); + + $output = implode("\n", $output); + if (strpos($output, 'Sphinx ') === false) + { + return sprintf($user->lang['FULLTEXT_SPHINX_FILE_NOT_EXECUTABLE'], $executable); + } + } + } + + $writable_paths = array( + $config['fulltext_sphinx_config_path'] => array('config' => 'fulltext_sphinx_autoconf', 'subdir' => false), + $config['fulltext_sphinx_data_path'] => array('config' => 'fulltext_sphinx_autorun', 'subdir' => 'log'), + $config['fulltext_sphinx_data_path'] . 'log/' => array('config' => 'fulltext_sphinx_autorun', 'subdir' => false), + ); + + foreach ($writable_paths as $path => $info) + { + if ($config[$info['config']]) + { + // make sure directory exists + // if we could drop the @ here and figure out whether the file really + // doesn't exist or whether open_basedir is in effect, would be nice + if (!@file_exists($path)) + { + return sprintf($user->lang['FULLTEXT_SPHINX_DIRECTORY_NOT_FOUND'], $path); + } + + // now check if it is writable by storing a simple file + $filename = $path . 'write_test'; + $fp = @fopen($filename, 'wb'); + if ($fp === false) + { + return sprintf($user->lang['FULLTEXT_SPHINX_FILE_NOT_WRITABLE'], $filename); + } + @fclose($fp); + + @unlink($filename); + + if ($info['subdir'] !== false) + { + if (!is_dir($path . $info['subdir'])) + { + mkdir($path . $info['subdir']); + } + } + } + } + + if ($config['fulltext_sphinx_autoconf']) + { + include ($phpbb_root_path . 'config.' . $phpEx); + + // now that we're sure everything was entered correctly, generate a config for the index + // we misuse the avatar_salt for this, as it should be unique ;-) + + if (!class_exists('sphinx_config')) + { + include($phpbb_root_path . 'includes/functions_sphinx.php'); + } + + if (!file_exists($config['fulltext_sphinx_config_path'] . 'sphinx.conf')) + { + $filename = $config['fulltext_sphinx_config_path'] . 'sphinx.conf'; + $fp = @fopen($filename, 'wb'); + if ($fp === false) + { + return sprintf($user->lang['FULLTEXT_SPHINX_FILE_NOT_WRITABLE'], $filename); + } + @fclose($fp); + } + + $config_object = new sphinx_config($config['fulltext_sphinx_config_path'] . 'sphinx.conf'); + + $config_data = array( + "source source_phpbb_{$this->id}_main" => array( + array('type', 'mysql'), + array('sql_host', $dbhost), + array('sql_user', $dbuser), + array('sql_pass', $dbpasswd), + array('sql_db', $dbname), + array('sql_port', $dbport), + array('sql_query_pre', 'SET NAMES utf8'), + array('sql_query_pre', 'REPLACE INTO ' . SPHINX_TABLE . ' SELECT 1, MAX(post_id) FROM ' . POSTS_TABLE . ''), + array('sql_query_range', 'SELECT MIN(post_id), MAX(post_id) FROM ' . POSTS_TABLE . ''), + array('sql_range_step', '5000'), + array('sql_query', 'SELECT + p.post_id AS id, + p.forum_id, + p.topic_id, + p.poster_id, + IF(p.post_id = t.topic_first_post_id, 1, 0) as topic_first_post, + p.post_time, + p.post_subject, + p.post_subject as title, + p.post_text as data, + t.topic_last_post_time, + 0 as deleted + FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t + WHERE + p.topic_id = t.topic_id + AND p.post_id >= $start AND p.post_id <= $end'), + array('sql_query_post', ''), + array('sql_query_post_index', 'REPLACE INTO ' . SPHINX_TABLE . ' ( counter_id, max_doc_id ) VALUES ( 1, $maxid )'), + array('sql_query_info', 'SELECT * FROM ' . POSTS_TABLE . ' WHERE post_id = $id'), + array('sql_attr_uint', 'forum_id'), + array('sql_attr_uint', 'topic_id'), + array('sql_attr_uint', 'poster_id'), + array('sql_attr_bool', 'topic_first_post'), + array('sql_attr_bool', 'deleted'), + array('sql_attr_timestamp' , 'post_time'), + array('sql_attr_timestamp' , 'topic_last_post_time'), + array('sql_attr_str2ordinal', 'post_subject'), + ), + "source source_phpbb_{$this->id}_delta : source_phpbb_{$this->id}_main" => array( + array('sql_query_pre', ''), + array('sql_query_range', ''), + array('sql_range_step', ''), + array('sql_query', 'SELECT + p.post_id AS id, + p.forum_id, + p.topic_id, + p.poster_id, + IF(p.post_id = t.topic_first_post_id, 1, 0) as topic_first_post, + p.post_time, + p.post_subject, + p.post_subject as title, + p.post_text as data, + t.topic_last_post_time, + 0 as deleted + FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t + WHERE + p.topic_id = t.topic_id + AND p.post_id >= ( SELECT max_doc_id FROM ' . SPHINX_TABLE . ' WHERE counter_id=1 )'), + ), + "index index_phpbb_{$this->id}_main" => array( + array('path', $config['fulltext_sphinx_data_path'] . "index_phpbb_{$this->id}_main"), + array('source', "source_phpbb_{$this->id}_main"), + array('docinfo', 'extern'), + array('morphology', 'none'), + array('stopwords', (file_exists($config['fulltext_sphinx_config_path'] . 'sphinx_stopwords.txt') && $config['fulltext_sphinx_stopwords']) ? $config['fulltext_sphinx_config_path'] . 'sphinx_stopwords.txt' : ''), + array('min_word_len', '2'), + array('charset_type', 'utf-8'), + array('charset_table', 'U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+ß410..U+042F->U+0430..U+044F, U+0430..U+044F, U+4E00..U+9FFF'), + array('min_prefix_len', '0'), + array('min_infix_len', '0'), + ), + "index index_phpbb_{$this->id}_delta : index_phpbb_{$this->id}_main" => array( + array('path', $config['fulltext_sphinx_data_path'] . "index_phpbb_{$this->id}_delta"), + array('source', "source_phpbb_{$this->id}_delta"), + ), + 'indexer' => array( + array('mem_limit', $config['fulltext_sphinx_indexer_mem_limit'] . 'M'), + ), + 'searchd' => array( + array('address' , '127.0.0.1'), + array('port', ($config['fulltext_sphinx_port']) ? $config['fulltext_sphinx_port'] : '3312'), + array('log', $config['fulltext_sphinx_data_path'] . "log/searchd.log"), + array('query_log', $config['fulltext_sphinx_data_path'] . "log/sphinx-query.log"), + array('read_timeout', '5'), + array('max_children', '30'), + array('pid_file', $config['fulltext_sphinx_data_path'] . "searchd.pid"), + array('max_matches', (string) MAX_MATCHES), + ), + ); + + $non_unique = array('sql_query_pre' => true, 'sql_attr_uint' => true, 'sql_attr_timestamp' => true, 'sql_attr_str2ordinal' => true, 'sql_attr_bool' => true); + $delete = array('sql_group_column' => true, 'sql_date_column' => true, 'sql_str2ordinal_column' => true); + + foreach ($config_data as $section_name => $section_data) + { + $section = &$config_object->get_section_by_name($section_name); + if (!$section) + { + $section = &$config_object->add_section($section_name); + } + + foreach ($delete as $key => $void) + { + $section->delete_variables_by_name($key); + } + + foreach ($non_unique as $key => $void) + { + $section->delete_variables_by_name($key); + } + + foreach ($section_data as $entry) + { + $key = $entry[0]; + $value = $entry[1]; + + if (!isset($non_unique[$key])) + { + $variable = &$section->get_variable_by_name($key); + if (!$variable) + { + $variable = &$section->create_variable($key, $value); + } + else + { + $variable->set_value($value); + } + } + else + { + $variable = &$section->create_variable($key, $value); + } + } + } + + $config_object->write($config['fulltext_sphinx_config_path'] . 'sphinx.conf'); + } + + set_config('fulltext_sphinx_configured', '1'); + + $this->shutdown_searchd(); + $this->tidy(); + + return false; + } + + /** + * Splits keywords entered by a user into an array of words stored in $this->split_words + * Stores the tidied search query in $this->search_query + * + * @param string $keywords Contains the keyword as entered by the user + * @param string $terms is either 'all' or 'any' + * @return false if no valid keywords were found and otherwise true + */ + function split_keywords(&$keywords, $terms) + { + global $config; + + if ($terms == 'all') + { + $match = array('#\sand\s#i', '#\sor\s#i', '#\snot\s#i', '#\+#', '#-#', '#\|#', '#@#'); + $replace = array(' & ', ' | ', ' - ', ' +', ' -', ' |', ''); + + $replacements = 0; + $keywords = preg_replace($match, $replace, $keywords); + $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED); + } + else + { + $this->sphinx->SetMatchMode(SPH_MATCH_ANY); + } + + $match = array(); + // Keep quotes + $match[] = "#"#"; + // KeepNew lines + $match[] = "#[\n]+#"; + + $replace = array('"', " "); + + $keywords = str_replace(array('"', "\n"), array('"', ' '), trim($keywords)); + + if (strlen($keywords) > 0) + { + $this->search_query = str_replace('"', '"', $keywords); + return true; + } + + return false; + } + + /** + * Performs a search on keywords depending on display specific params. You have to run split_keywords() first. + * + * @param string $type contains either posts or topics depending on what should be searched for + * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) + * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) + * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query + * @param string $sort_key is the key of $sort_by_sql for the selected sorting + * @param string $sort_dir is either a or d representing ASC and DESC + * @param string $sort_days specifies the maximum amount of days a post may be old + * @param array $ex_fid_ary specifies an array of forum ids which should not be searched + * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts + * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched + * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty + * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match + * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered + * @param int $start indicates the first index of the page + * @param int $per_page number of ids each page is supposed to contain + * @return boolean|int total number of results + * + * @access public + */ + function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) + { + global $config, $db, $auth; + + // No keywords? No posts. + if (!strlen($this->search_query) && !sizeof($author_ary)) + { + return false; + } + + $id_ary = array(); + + $join_topic = ($type == 'posts') ? false : true; + + // sorting + + if ($type == 'topics') + { + switch ($sort_key) + { + case 'a': + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'poster_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + case 'f': + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'forum_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + case 'i': + case 's': + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'post_subject ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + case 't': + default: + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'topic_last_post_time ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + } + } + else + { + switch ($sort_key) + { + case 'a': + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'poster_id'); + break; + case 'f': + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'forum_id'); + break; + case 'i': + case 's': + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_subject'); + break; + case 't': + default: + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_time'); + break; + } + } + + // most narrow filters first + if ($topic_id) + { + $this->sphinx->SetFilter('topic_id', array($topic_id)); + } + + $search_query_prefix = ''; + + switch($fields) + { + case 'titleonly': + // only search the title + if ($terms == 'all') + { + $search_query_prefix = '@title '; + } + $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); // weight for the title + $this->sphinx->SetFilter('topic_first_post', array(1)); // 1 is first_post, 0 is not first post + break; + + case 'msgonly': + // only search the body + if ($terms == 'all') + { + $search_query_prefix = '@data '; + } + $this->sphinx->SetFieldWeights(array("title" => 1, "data" => 5)); // weight for the body + break; + + case 'firstpost': + $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); // more relative weight for the title, also search the body + $this->sphinx->SetFilter('topic_first_post', array(1)); // 1 is first_post, 0 is not first post + break; + + default: + $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); // more relative weight for the title, also search the body + break; + } + + if (sizeof($author_ary)) + { + $this->sphinx->SetFilter('poster_id', $author_ary); + } + + if (sizeof($ex_fid_ary)) + { + // All forums that a user is allowed to access + $fid_ary = array_unique(array_intersect(array_keys($auth->acl_getf('f_read', true)), array_keys($auth->acl_getf('f_search', true)))); + // All forums that the user wants to and can search in + $search_forums = array_diff($fid_ary, $ex_fid_ary); + + if (sizeof($search_forums)) + { + $this->sphinx->SetFilter('forum_id', $search_forums); + } + } + + $this->sphinx->SetFilter('deleted', array(0)); + + $this->sphinx->SetLimits($start, (int) $per_page, MAX_MATCHES); + $result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); + + // could be connection to localhost:3312 failed (errno=111, msg=Connection refused) during rotate, retry if so + $retries = CONNECT_RETRIES; + while (!$result && (strpos($this->sphinx->_error, "errno=111,") !== false) && $retries--) + { + usleep(CONNECT_WAIT_TIME); + $result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); + } + + $id_ary = array(); + if (isset($result['matches'])) + { + if ($type == 'posts') + { + $id_ary = array_keys($result['matches']); + } + else + { + foreach($result['matches'] as $key => $value) + { + $id_ary[] = $value['attrs']['topic_id']; + } + } + } + else + { + return false; + } + + $result_count = $result['total_found']; + + $id_ary = array_slice($id_ary, 0, (int) $per_page); + + return $result_count; + } + + /** + * Performs a search on an author's posts without caring about message contents. Depends on display specific params + * + * @param string $type contains either posts or topics depending on what should be searched for + * @param boolean $firstpost_only if true, only topic starting posts will be considered + * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query + * @param string $sort_key is the key of $sort_by_sql for the selected sorting + * @param string $sort_dir is either a or d representing ASC and DESC + * @param string $sort_days specifies the maximum amount of days a post may be old + * @param array $ex_fid_ary specifies an array of forum ids which should not be searched + * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts + * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched + * @param array $author_ary an array of author ids + * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match + * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered + * @param int $start indicates the first index of the page + * @param int $per_page number of ids each page is supposed to contain + * @return boolean|int total number of results + * + * @access public + */ + function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) + { + $this->search_query = ''; + + $this->sphinx->SetMatchMode(SPH_MATCH_FULLSCAN); + $fields = ($firstpost_only) ? 'firstpost' : 'all'; + $terms = 'all'; + return $this->keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, $id_ary, $start, $per_page); + } + + /** + * Updates wordlist and wordmatch tables when a message is posted or changed + * + * @param string $mode Contains the post mode: edit, post, reply, quote + * @param int $post_id The id of the post which is modified/created + * @param string &$message New or updated post content + * @param string &$subject New or updated post subject + * @param int $poster_id Post author's user id + * @param int $forum_id The id of the forum in which the post is located + * + * @access public + */ + function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) + { + global $config, $db; + + if ($mode == 'edit') + { + $this->sphinx->UpdateAttributes($this->indexes, array('forum_id', 'poster_id'), array((int)$post_id => array((int)$forum_id, (int)$poster_id))); + } + else if ($mode != 'post' && $post_id) + { + // update topic_last_post_time for full topic + $sql = 'SELECT p1.post_id + FROM ' . POSTS_TABLE . ' p1 + LEFT JOIN ' . POSTS_TABLE . ' p2 ON (p1.topic_id = p2.topic_id) + WHERE p2.post_id = ' . $post_id; + $result = $db->sql_query($sql); + + $post_updates = array(); + $post_time = time(); + while ($row = $db->sql_fetchrow($result)) + { + $post_updates[(int)$row['post_id']] = array((int) $post_time); + } + $db->sql_freeresult($result); + + if (sizeof($post_updates)) + { + $this->sphinx->UpdateAttributes($this->indexes, array('topic_last_post_time'), $post_updates); + } + } + + if ($config['fulltext_sphinx_autorun']) + { + if ($this->index_created()) + { + $rotate = ($this->searchd_running()) ? ' --rotate' : ''; + + $cwd = getcwd(); + chdir($config['fulltext_sphinx_bin_path']); + exec('./' . INDEXER_NAME . $rotate . ' --config ' . $config['fulltext_sphinx_config_path'] . 'sphinx.conf index_phpbb_' . $this->id . '_delta >> ' . $config['fulltext_sphinx_data_path'] . 'log/indexer.log 2>&1 &'); + var_dump('./' . INDEXER_NAME . $rotate . ' --config ' . $config['fulltext_sphinx_config_path'] . 'sphinx.conf index_phpbb_' . $this->id . '_delta >> ' . $config['fulltext_sphinx_data_path'] . 'log/indexer.log 2>&1 &'); + chdir($cwd); + } + } + } + + /** + * Delete a post from the index after it was deleted + */ + function index_remove($post_ids, $author_ids, $forum_ids) + { + $values = array(); + foreach ($post_ids as $post_id) + { + $values[$post_id] = array(1); + } + + $this->sphinx->UpdateAttributes($this->indexes, array('deleted'), $values); + } + + /** + * Destroy old cache entries + */ + function tidy($create = false) + { + global $config; + + if ($config['fulltext_sphinx_autorun']) + { + if ($this->index_created() || $create) + { + $rotate = ($this->searchd_running()) ? ' --rotate' : ''; + + $cwd = getcwd(); + chdir($config['fulltext_sphinx_bin_path']); + exec('./' . INDEXER_NAME . $rotate . ' --config ' . $config['fulltext_sphinx_config_path'] . 'sphinx.conf index_phpbb_' . $this->id . '_main >> ' . $config['fulltext_sphinx_data_path'] . 'log/indexer.log 2>&1 &'); + exec('./' . INDEXER_NAME . $rotate . ' --config ' . $config['fulltext_sphinx_config_path'] . 'sphinx.conf index_phpbb_' . $this->id . '_delta >> ' . $config['fulltext_sphinx_data_path'] . 'log/indexer.log 2>&1 &'); + chdir($cwd); + } + } + + set_config('search_last_gc', time(), true); + } + + /** + * Create sphinx table + */ + function create_index($acp_module, $u_action) + { + global $db, $user, $config; + + $this->shutdown_searchd(); + + if (!isset($config['fulltext_sphinx_configured']) || !$config['fulltext_sphinx_configured']) + { + $user->add_lang('mods/fulltext_sphinx'); + + return $user->lang['FULLTEXT_SPHINX_CONFIGURE_FIRST']; + } + + if (!$this->index_created()) + { + $sql = 'CREATE TABLE IF NOT EXISTS ' . SPHINX_TABLE . ' ( + counter_id INT NOT NULL PRIMARY KEY, + max_doc_id INT NOT NULL + )'; + $db->sql_query($sql); + + $sql = 'TRUNCATE TABLE ' . SPHINX_TABLE; + $db->sql_query($sql); + } + + // start indexing process + $this->tidy(true); + + $this->shutdown_searchd(); + + return false; + } + + /** + * Drop sphinx table + */ + function delete_index($acp_module, $u_action) + { + global $db, $config; + + $this->shutdown_searchd(); + + if ($config['fulltext_sphinx_autorun']) + { + sphinx_unlink_by_pattern($config['fulltext_sphinx_data_path'], '#^index_phpbb_' . $this->id . '.*$#'); + } + + if (!$this->index_created()) + { + return false; + } + + $sql = 'DROP TABLE ' . SPHINX_TABLE; + $db->sql_query($sql); + + $this->shutdown_searchd(); + + return false; + } + + /** + * Returns true if the sphinx table was created + */ + function index_created($allow_new_files = true) + { + global $db, $config; + + $sql = 'SHOW TABLES LIKE \'' . SPHINX_TABLE . '\''; + $result = $db->sql_query($sql); + $row = $db->sql_fetchrow($result); + $db->sql_freeresult($result); + + $created = false; + + if ($row) + { + if ($config['fulltext_sphinx_autorun']) + { + if ((file_exists($config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_main.spd') && file_exists($config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_delta.spd')) || ($allow_new_files && file_exists($config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_main.new.spd') && file_exists($config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_delta.new.spd'))) + { + $created = true; + } + } + else + { + $created = true; + } + } + + return $created; + } + + /** + * Kills the searchd process and makes sure there's no locks left over + */ + function shutdown_searchd() + { + global $config; + + if ($config['fulltext_sphinx_autorun']) + { + if (!function_exists('exec')) + { + set_config('fulltext_sphinx_autorun', '0'); + return; + } + + exec('killall -9 ' . SEARCHD_NAME . ' >> /dev/null 2>&1 &'); + + if (file_exists($config['fulltext_sphinx_data_path'] . 'searchd.pid')) + { + unlink($config['fulltext_sphinx_data_path'] . 'searchd.pid'); + } + + sphinx_unlink_by_pattern($config['fulltext_sphinx_data_path'], '#^.*\.spl$#'); + } + } + + /** + * Checks whether searchd is running, if it's not running it makes sure there's no left over + * files by calling shutdown_searchd. + * + * @return boolean Whether searchd is running or not + */ + function searchd_running() + { + global $config; + + // if we cannot manipulate the service assume it is running + if (!$config['fulltext_sphinx_autorun']) + { + return true; + } + + if (file_exists($config['fulltext_sphinx_data_path'] . 'searchd.pid')) + { + $pid = trim(file_get_contents($config['fulltext_sphinx_data_path'] . 'searchd.pid')); + + if ($pid) + { + $output = array(); + $pidof_command = 'pidof'; + + exec('whereis -b pidof', $output); + if (sizeof($output) > 1) + { + $output = explode(' ', trim($output[0])); + $pidof_command = $output[1]; // 0 is pidof: + } + + $output = array(); + exec($pidof_command . ' ' . SEARCHD_NAME, $output); + if (sizeof($output) && (trim($output[0]) == $pid || trim($output[1]) == $pid)) + { + return true; + } + } + } + + // make sure it's really not running + $this->shutdown_searchd(); + + return false; + } + + /** + * Returns an associative array containing information about the indexes + */ + function index_stats() + { + global $user; + + if (empty($this->stats)) + { + $this->get_stats(); + } + + $user->add_lang('mods/fulltext_sphinx'); + + return array( + $user->lang['FULLTEXT_SPHINX_MAIN_POSTS'] => ($this->index_created()) ? $this->stats['main_posts'] : 0, + $user->lang['FULLTEXT_SPHINX_DELTA_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] - $this->stats['main_posts'] : 0, + $user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0, + $user->lang['FULLTEXT_SPHINX_LAST_SEARCHES'] => nl2br($this->stats['last_searches']), + ); + } + + /** + * Collects stats that can be displayed on the index maintenance page + */ + function get_stats() + { + global $db, $config; + + if ($this->index_created()) + { + $sql = 'SELECT COUNT(post_id) as total_posts + FROM ' . POSTS_TABLE; + $result = $db->sql_query($sql); + $this->stats['total_posts'] = (int) $db->sql_fetchfield('total_posts'); + $db->sql_freeresult($result); + + $sql = 'SELECT COUNT(p.post_id) as main_posts + FROM ' . POSTS_TABLE . ' p, ' . SPHINX_TABLE . ' m + WHERE p.post_id <= m.max_doc_id + AND m.counter_id = 1'; + $result = $db->sql_query($sql); + $this->stats['main_posts'] = (int) $db->sql_fetchfield('main_posts'); + $db->sql_freeresult($result); + } + + $this->stats['last_searches'] = ''; + if ($config['fulltext_sphinx_autorun']) + { + if (file_exists($config['fulltext_sphinx_data_path'] . 'log/sphinx-query.log')) + { + $last_searches = explode("\n", utf8_htmlspecialchars(sphinx_read_last_lines($config['fulltext_sphinx_data_path'] . 'log/sphinx-query.log', 3))); + + foreach($last_searches as $i => $search) + { + if (strpos($search, '[' . $this->indexes . ']') !== false) + { + $last_searches[$i] = str_replace('[' . $this->indexes . ']', '', $search); + } + else + { + $last_searches[$i] = ''; + } + } + $this->stats['last_searches'] = implode("\n", $last_searches); + } + } + } + + /** + * Returns a list of options for the ACP to display + */ + function acp() + { + global $user, $config; + + $user->add_lang('mods/fulltext_sphinx'); + + $config_vars = array( + 'fulltext_sphinx_autoconf' => 'bool', + 'fulltext_sphinx_autorun' => 'bool', + 'fulltext_sphinx_config_path' => 'string', + 'fulltext_sphinx_data_path' => 'string', + 'fulltext_sphinx_bin_path' => 'string', + 'fulltext_sphinx_port' => 'int', + 'fulltext_sphinx_stopwords' => 'bool', + 'fulltext_sphinx_indexer_mem_limit' => 'int', + ); + + $defaults = array( + 'fulltext_sphinx_autoconf' => '1', + 'fulltext_sphinx_autorun' => '1', + 'fulltext_sphinx_indexer_mem_limit' => '512', + ); + + foreach ($config_vars as $config_var => $type) + { + if (!isset($config[$config_var])) + { + $default = ''; + if (isset($defaults[$config_var])) + { + $default = $defaults[$config_var]; + } + set_config($config_var, $default); + } + } + + $no_autoconf = false; + $no_autorun = false; + $bin_path = $config['fulltext_sphinx_bin_path']; + + // try to guess the path if it is empty + if (empty($bin_path)) + { + if (@file_exists('/usr/local/bin/' . INDEXER_NAME) && @file_exists('/usr/local/bin/' . SEARCHD_NAME)) + { + $bin_path = '/usr/local/bin/'; + } + else if (@file_exists('/usr/bin/' . INDEXER_NAME) && @file_exists('/usr/bin/' . SEARCHD_NAME)) + { + $bin_path = '/usr/bin/'; + } + else + { + $output = array(); + if (!function_exists('exec') || null === @exec('whereis -b ' . INDEXER_NAME, $output)) + { + $no_autorun = true; + } + else if (sizeof($output)) + { + $output = explode(' ', $output[0]); + array_shift($output); // remove indexer: + + foreach ($output as $path) + { + $path = dirname($path) . '/'; + + if (file_exists($path . INDEXER_NAME) && file_exists($path . SEARCHD_NAME)) + { + $bin_path = $path; + break; + } + } + } + } + } + + if ($no_autorun) + { + set_config('fulltext_sphinx_autorun', '0'); + } + + if ($no_autoconf) + { + set_config('fulltext_sphinx_autoconf', '0'); + } + + // rewrite config if fulltext sphinx is enabled + if ($config['fulltext_sphinx_autoconf'] && isset($config['fulltext_sphinx_configured']) && $config['fulltext_sphinx_configured']) + { + $this->config_updated(); + } + + // check whether stopwords file is available and enabled + if (@file_exists($config['fulltext_sphinx_config_path'] . 'sphinx_stopwords.txt')) + { + $stopwords_available = true; + $stopwords_active = $config['fulltext_sphinx_stopwords']; + } + else + { + $stopwords_available = false; + $stopwords_active = false; + set_config('fulltext_sphinx_stopwords', '0'); + } + + $tpl = ' + ' . $user->lang['FULLTEXT_SPHINX_CONFIGURE_BEFORE']. ' +
exec
function which is disabled on your system.',
+ 'FULLTEXT_SPHINX_UNCONFIGURED' => 'Please set all necessary options in the "Fulltext Sphinx" section of the previous page before you try to activate the sphinx plugin.',
+ 'FULLTEXT_SPHINX_WRONG_DATABASE' => 'The sphinx plugin for phpBB currently only supports MySQL',
+ 'FULLTEXT_SPHINX_STOPWORDS_FILE' => 'Stopwords activated',
+ 'FULLTEXT_SPHINX_STOPWORDS_FILE_EXPLAIN' => 'This setting only works with autoconf enabled. You can place a file called sphinx_stopwords.txt containing one word in each line in your config directory. If this file is present these words will be excluded from the indexing process.',
+));
+
+?>
\ No newline at end of file