Merge pull request #6345 from marc1706/ticket/16935

[ticket/16935] Refactor sphinx to use new clases and remove unused parts
This commit is contained in:
Marc Alexander 2022-01-22 09:04:43 +01:00 committed by GitHub
commit 869867fdbe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 268 additions and 362 deletions

144
.github/setup-sphinx.sh vendored Executable file
View file

@ -0,0 +1,144 @@
#!/bin/bash
#
# This file is part of the phpBB Forum Software package.
#
# @copyright (c) phpBB Limited <https://www.phpbb.com>
# @license GNU General Public License, version 2 (GPL-2.0)
#
# For full copyright and license information, please see
# the docs/CREDITS.txt file.
#
set -e
set -x
sudo apt-get update
sudo apt-get install -q -y sphinxsearch
DIR=$(dirname "$0")
SPHINX_DAEMON_HOST="localhost"
SPHINX_DAEMON_PORT="9312"
SPHINX_CONF="$DIR/sphinx.conf"
SPHINX_DATA_DIR="/var/run/sphinxsearch"
SPHINX_LOG="$SPHINX_DATA_DIR/log/searchd.log"
SPHINX_QUERY_LOG="$SPHINX_DATA_DIR/log/sphinx-query.log"
ID="saw9zf2fdhp1goue" # Randomly generated via phpBB unique_id()
PHPBB_TEST_DBHOST="0.0.0.0"
PHPBB_TEST_DBNAME="phpbb_tests"
PHPBB_TEST_DBUSER="root"
PHPBB_TEST_DBPASSWD=""
sudo service sphinxsearch stop
sudo mkdir -p "$SPHINX_DATA_DIR/log"
sudo chown "sphinxsearch" "$SPHINX_DATA_DIR/log"
# Generate configuration file for Sphinx
echo "
source source_phpbb_${ID}_main
{
type = mysql # mysql or pgsql
sql_host = $PHPBB_TEST_DBHOST
sql_user = $PHPBB_TEST_DBUSER
sql_pass = $PHPBB_TEST_DBPASSWD
sql_db = $PHPBB_TEST_DBNAME
sql_port =
sql_query_pre = SET NAMES 'utf8'
sql_query_pre = UPDATE phpbb_sphinx SET max_doc_id = (SELECT MAX(post_id) FROM phpbb_posts) WHERE counter_id = 1
sql_query_range = SELECT MIN(post_id), MAX(post_id) FROM phpbb_posts
sql_range_step = 5000
sql_query = SELECT \
p.post_id AS id, \
p.forum_id, \
p.topic_id, \
p.poster_id, \
p.post_visibility, \
CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, \
p.post_time, \
p.post_subject, \
p.post_subject as title, \
p.post_text as data, \
t.topic_last_post_time, \
0 as deleted \
FROM phpbb_posts p, phpbb_topics t \
WHERE \
p.topic_id = t.topic_id \
AND p.post_id >= \$start AND p.post_id <= \$end
sql_query_post =
sql_query_post_index = UPDATE phpbb_sphinx SET max_doc_id = \$maxid WHERE counter_id = 1
sql_attr_uint = forum_id
sql_attr_uint = topic_id
sql_attr_uint = poster_id
sql_attr_uint = post_visibility
sql_attr_bool = topic_first_post
sql_attr_bool = deleted
sql_attr_timestamp = post_time
sql_attr_timestamp = topic_last_post_time
sql_attr_string = post_subject
}
source source_phpbb_${ID}_delta : source_phpbb_${ID}_main
{
sql_query_pre = SET NAMES 'utf8'
sql_query_range =
sql_range_step =
sql_query = SELECT \
p.post_id AS id, \
p.forum_id, \
p.topic_id, \
p.poster_id, \
p.post_visibility, \
CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, \
p.post_time, \
p.post_subject, \
p.post_subject as title, \
p.post_text as data, \
t.topic_last_post_time, \
0 as deleted \
FROM phpbb_posts p, phpbb_topics t \
WHERE \
p.topic_id = t.topic_id \
AND p.post_id >= ( SELECT max_doc_id FROM phpbb_sphinx WHERE counter_id=1 )
sql_query_post_index =
}
index index_phpbb_${ID}_main
{
path = $SPHINX_DATA_DIR/index_phpbb_${ID}_main
source = source_phpbb_${ID}_main
docinfo = extern
morphology = none
stopwords =
wordforms =
exceptions =
min_word_len = 2
charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+4E00..U+9FFF
ignore_chars = U+0027, U+002C
min_prefix_len = 3
min_infix_len = 0
html_strip = 1
index_exact_words = 0
blend_chars = U+23, U+24, U+25, U+26, U+40
}
index index_phpbb_${ID}_delta : index_phpbb_${ID}_main
{
path = $SPHINX_DATA_DIR/index_phpbb_${ID}_delta
source = source_phpbb_${ID}_delta
}
indexer
{
mem_limit = 512M
}
searchd
{
listen = $SPHINX_DAEMON_PORT
log = $SPHINX_LOG
query_log = $SPHINX_QUERY_LOG
read_timeout = 5
max_children = 30
pid_file = $SPHINX_DATA_DIR/searchd.pid
binlog_path = $SPHINX_DATA_DIR/
}
" > $SPHINX_CONF
sudo mv "$SPHINX_CONF" "/etc/sphinxsearch/sphinx.conf"
sudo sed -i "s/START=no/START=yes/g" "/etc/default/sphinxsearch"
sudo chmod 777 "/var/run/sphinxsearch"

View file

@ -213,6 +213,10 @@ jobs:
run: |
.github/setup-ldap.sh
- name: Setup SPHINX
run: |
.github/setup-sphinx.sh
- name: Lint tests
if: ${{ matrix.SLOWTESTS != 1 && steps.database-type.outputs.db == 'mysql' }}
run: phpBB/vendor/bin/phpunit tests/lint_test.php

View file

@ -57,8 +57,8 @@ class fulltext_sphinx implements search_backend_interface
protected $indexes;
/**
* Sphinx searchd client object
* @var SphinxClient
* Sphinx search client object
* @var \SphinxClient
*/
protected $sphinx;
@ -631,7 +631,7 @@ class fulltext_sphinx implements search_backend_interface
*/
public function create_index(int &$post_counter = 0): ?array
{
if ($this->index_created())
if (!$this->index_created())
{
$table_data = array(
'COLUMNS' => array(
@ -642,9 +642,6 @@ class fulltext_sphinx implements search_backend_interface
);
$this->db_tools->sql_create_table(SPHINX_TABLE, $table_data);
$sql = 'TRUNCATE TABLE ' . SPHINX_TABLE;
$this->db->sql_query($sql);
$data = array(
'counter_id' => '1',
'max_doc_id' => '0',
@ -857,7 +854,7 @@ class fulltext_sphinx implements search_backend_interface
/* Now that we're sure everything was entered correctly,
generate a config for the index. We use a config value
fulltext_sphinx_id for this, as it should be unique. */
$config_object = new \phpbb\search\sphinx\config($this->config_file_data);
$config_object = new \phpbb\search\backend\sphinx\config();
$config_data = array(
'source source_phpbb_' . $this->id . '_main' => array(
array('type', $this->dbtype . ' # mysql or pgsql'),

View file

@ -19,259 +19,47 @@ namespace phpbb\search\backend\sphinx;
*/
class config
{
private $sections = array();
/**
* Constructor which optionally loads data from a variable
*
* @param string $config_data Variable containing the sphinx configuration data
*
* @access public
*/
function __construct($config_data)
{
if ($config_data != '')
{
$this->read($config_data);
}
}
/** @var array Sections array */
private $sections = [];
/**
* Get a section object by its name
*
* @param string $name The name of the section that shall be returned
* @return \phpbb\search\sphinx\config_section The section object or null if none was found
*
* @access public
* @param string $name The name of the section that shall be returned
* @return config_section|null The section object or null if none was found
*/
function get_section_by_name($name)
public function get_section_by_name(string $name): ?config_section
{
for ($i = 0, $size = count($this->sections); $i < $size; $i++)
{
// Make sure this is really a section object and not a comment
if (($this->sections[$i] instanceof \phpbb\search\sphinx\config_section) && $this->sections[$i]->get_name() == $name)
if (($this->sections[$i] instanceof config_section) && $this->sections[$i]->get_name() == $name)
{
return $this->sections[$i];
}
}
return null;
}
/**
* Appends a new empty section to the end of the config
*
* @param string $name The name for the new section
* @return \phpbb\search\sphinx\config_section The newly created section object
*
* @access public
* @param string $name The name for the new section
* @return config_section The newly created section object
*/
function add_section($name)
public function add_section(string $name): config_section
{
$this->sections[] = new \phpbb\search\sphinx\config_section($name, '');
$this->sections[] = new config_section($name, '');
return $this->sections[count($this->sections) - 1];
}
/**
* Reads the config file data
*
* @param string $config_data The config file data
*
* @access private
*/
function read($config_data)
{
$this->sections = array();
$section = null;
$found_opening_bracket = false;
$in_value = false;
foreach ($config_data as $i => $line)
{
// If the value of a variable continues to the next line because the line
// break was escaped then we don't trim leading space but treat it as a part of the value
if ($in_value)
{
$line = rtrim($line);
}
else
{
$line = trim($line);
}
// If we're not inside a section look for one
if (!$section)
{
// Add empty lines and comments as comment objects to the section list
// that way they're not deleted when reassembling the file from the sections
if (!$line || $line[0] == '#')
{
$this->sections[] = new \phpbb\search\sphinx\config_comment($config_file[$i]);
continue;
}
else
{
// Otherwise we scan the line reading the section name until we find
// an opening curly bracket or a comment
$section_name = '';
$section_name_comment = '';
$found_opening_bracket = false;
for ($j = 0, $length = strlen($line); $j < $length; $j++)
{
if ($line[$j] == '#')
{
$section_name_comment = substr($line, $j);
break;
}
if ($found_opening_bracket)
{
continue;
}
if ($line[$j] == '{')
{
$found_opening_bracket = true;
continue;
}
$section_name .= $line[$j];
}
// And then we create the new section object
$section_name = trim($section_name);
$section = new \phpbb\search\sphinx\config_section($section_name, $section_name_comment);
}
}
else
{
// If we're looking for variables inside a section
$skip_first = false;
// If we're not in a value continuing over the line feed
if (!$in_value)
{
// Then add empty lines and comments as comment objects to the variable list
// of this section so they're not deleted on reassembly
if (!$line || $line[0] == '#')
{
$section->add_variable(new \phpbb\search\sphinx\config_comment($config_file[$i]));
continue;
}
// As long as we haven't yet actually found an opening bracket for this section
// we treat everything as comments so it's not deleted either
if (!$found_opening_bracket)
{
if ($line[0] == '{')
{
$skip_first = true;
$line = substr($line, 1);
$found_opening_bracket = true;
}
else
{
$section->add_variable(new \phpbb\search\sphinx\config_comment($config_file[$i]));
continue;
}
}
}
// If we did not find a comment in this line or still add to the previous
// line's value ...
if ($line || $in_value)
{
if (!$in_value)
{
$name = '';
$value = '';
$comment = '';
$found_assignment = false;
}
$in_value = false;
$end_section = false;
/* ... then we should prase this line char by char:
- first there's the variable name
- then an equal sign
- the variable value
- possibly a backslash before the linefeed in this case we need to continue
parsing the value in the next line
- a # indicating that the rest of the line is a comment
- a closing curly bracket indicating the end of this section*/
for ($j = 0, $length = strlen($line); $j < $length; $j++)
{
if ($line[$j] == '#')
{
$comment = substr($line, $j);
break;
}
else if ($line[$j] == '}')
{
$comment = substr($line, $j + 1);
$end_section = true;
break;
}
else if (!$found_assignment)
{
if ($line[$j] == '=')
{
$found_assignment = true;
}
else
{
$name .= $line[$j];
}
}
else
{
if ($line[$j] == '\\' && $j == $length - 1)
{
$value .= "\n";
$in_value = true;
// Go to the next line and keep processing the value in there
continue 2;
}
$value .= $line[$j];
}
}
// If a name and an equal sign were found then we have append a
// new variable object to the section
if ($name && $found_assignment)
{
$section->add_variable(new \phpbb\search\sphinx\config_variable(trim($name), trim($value), ($end_section) ? '' : $comment));
continue;
}
/* If we found a closing curly bracket this section has been completed
and we can append it to the section list and continue with looking for
the next section */
if ($end_section)
{
$section->set_end_comment($comment);
$this->sections[] = $section;
$section = null;
continue;
}
}
// If we did not find anything meaningful up to here, then just treat it
// as a comment
$comment = ($skip_first) ? "\t" . substr(ltrim($config_file[$i]), 1) : $config_file[$i];
$section->add_variable(new \phpbb\search\sphinx\config_comment($comment));
}
}
}
/**
* Returns the config data
*
* @return string $data The config data that is generated
*
* @access public
*/
function get_data()
public function get_data(): string
{
$data = "";
foreach ($this->sections as $section)

View file

@ -1,47 +0,0 @@
<?php
/**
*
* This file is part of the phpBB Forum Software package.
*
* @copyright (c) phpBB Limited <https://www.phpbb.com>
* @license GNU General Public License, version 2 (GPL-2.0)
*
* For full copyright and license information, please see
* the docs/CREDITS.txt file.
*
*/
namespace phpbb\search\backend\sphinx;
/**
* \phpbb\search\sphinx\config_comment
* Represents a comment inside the sphinx configuration
*/
class config_comment
{
private $exact_string;
/**
* Create a new comment
*
* @param string $exact_string The content of the comment including newlines, leading whitespace, etc.
*
* @access public
*/
function __construct($exact_string)
{
$this->exact_string = $exact_string;
}
/**
* Simply returns the comment as it was created
*
* @return string The exact string that was specified in the constructor
*
* @access public
*/
function to_string()
{
return $this->exact_string;
}
}

View file

@ -0,0 +1,41 @@
<?php
/**
*
* This file is part of the phpBB Forum Software package.
*
* @copyright (c) phpBB Limited <https://www.phpbb.com>
* @license GNU General Public License, version 2 (GPL-2.0)
*
* For full copyright and license information, please see
* the docs/CREDITS.txt file.
*
*/
namespace phpbb\search\backend\sphinx;
/**
* \phpbb\search\backend\sphinx\config_item
* Represents a single config item inside the sphinx configuration
*/
abstract class config_item
{
/** @var string Item name */
protected $name = '';
/**
* Getter for the item's name
*
* @return string The item object's name
*/
public function get_name(): string
{
return $this->name;
}
/**
* Return string representation of config item
*
* @return string String representation of config item
*/
abstract public function to_string(): string;
}

View file

@ -14,15 +14,19 @@
namespace phpbb\search\backend\sphinx;
/**
* \phpbb\search\sphinx\config_section
* \phpbb\search\backend\sphinx\config_section
* Represents a single section inside the sphinx configuration
*/
class config_section
class config_section extends config_item
{
private $name;
/** @var string Section comment */
private $comment;
/** @var string Section end comment */
private $end_comment;
private $variables = array();
/** @var array Section variables array */
private $variables = [];
/**
* Construct a new section
@ -30,86 +34,57 @@ class config_section
* @param string $name Name of the section
* @param string $comment Comment that should be appended after the name in the
* textual format.
*
* @access public
*/
function __construct($name, $comment)
public function __construct(string $name, string $comment)
{
$this->name = $name;
$this->comment = $comment;
$this->end_comment = '';
}
/**
* Add a variable object to the list of variables in this section
*
* @param \phpbb\search\sphinx\config_variable $variable The variable object
*
* @access public
*/
function add_variable($variable)
{
$this->variables[] = $variable;
}
/**
* Adds a comment after the closing bracket in the textual representation
*
* @param string $end_comment
*
* @access public
*/
function set_end_comment($end_comment)
public function set_end_comment(string $end_comment): void
{
$this->end_comment = $end_comment;
}
/**
* Getter for the name of this section
*
* @return string Section's name
*
* @access public
*/
function get_name()
{
return $this->name;
}
/**
* Get a variable object by its name
*
* @param string $name The name of the variable that shall be returned
* @return \phpbb\search\sphinx\config_section The first variable object from this section with the
* given name or null if none was found
* @param string $name The name of the variable that shall be returned
*
* @access public
* @return config_variable|null The first variable object from this section with the
* given name or null if none was found
*/
function get_variable_by_name($name)
public function get_variable_by_name(string $name): ?config_variable
{
for ($i = 0, $size = count($this->variables); $i < $size; $i++)
{
// Make sure this is a variable object and not a comment
if (($this->variables[$i] instanceof \phpbb\search\sphinx\config_variable) && $this->variables[$i]->get_name() == $name)
if ($this->variables[$i]->get_name() == $name)
{
return $this->variables[$i];
}
}
return null;
}
/**
* Deletes all variables with the given name
*
* @param string $name The name of the variable objects that are supposed to be removed
*
* @access public
*/
function delete_variables_by_name($name)
public function delete_variables_by_name(string $name)
{
for ($i = 0, $size = count($this->variables); $i < $size; $i++)
{
// Make sure this is a variable object and not a comment
if (($this->variables[$i] instanceof \phpbb\search\sphinx\config_variable) && $this->variables[$i]->get_name() == $name)
if ($this->variables[$i]->get_name() == $name)
{
array_splice($this->variables, $i, 1);
$i--;
@ -118,17 +93,16 @@ class config_section
}
/**
* Create a new variable object and append it to the variable list of this section
* Create a new variable object and append it to the variables list of this section
*
* @param string $name The name for the new variable
* @param string $value The value for the new variable
* @return \phpbb\search\sphinx\config_variable Variable object that was created
* @param string $name The name for the new variable
* @param string $value The value for the new variable
*
* @access public
* @return config_variable Variable object that was created
*/
function create_variable($name, $value)
public function create_variable(string $name, string $value): config_variable
{
$this->variables[] = new \phpbb\search\sphinx\config_variable($name, $value, '');
$this->variables[] = new config_variable($name, $value);
return $this->variables[count($this->variables) - 1];
}
@ -136,10 +110,8 @@ class config_section
* Turns this object into a string which can be written to a config file
*
* @return string Config data in textual form, parsable for sphinx
*
* @access public
*/
function to_string()
public function to_string(): string
{
$content = $this->name . ' ' . $this->comment . "\n{\n";

View file

@ -14,12 +14,11 @@
namespace phpbb\search\backend\sphinx;
/**
* \phpbb\search\sphinx\config_variable
* \phpbb\search\backend\sphinx\config_variable
* Represents a single variable inside the sphinx configuration
*/
class config_variable
class config_variable extends config_item
{
private $name;
private $value;
private $comment;
@ -30,49 +29,29 @@ class config_variable
* @param string $value Value of the variable
* @param string $comment Optional comment after the variable in the
* config file
*
* @access public
*/
function __construct($name, $value, $comment)
public function __construct(string $name, string $value, string $comment = '')
{
$this->name = $name;
$this->value = $value;
$this->comment = $comment;
}
/**
* Getter for the variable's name
*
* @return string The variable object's name
*
* @access public
*/
function get_name()
{
return $this->name;
}
/**
* Allows changing the variable's value
*
* @param string $value New value for this variable
*
* @access public
*/
function set_value($value)
public function set_value(string $value): void
{
$this->value = $value;
}
/**
* Turns this object into a string readable by sphinx
*
* @return string Config data in textual form
*
* @access public
* {@inheritDoc}
*/
function to_string()
public function to_string(): string
{
return "\t" . $this->name . ' = ' . str_replace("\n", " \\\n", $this->value) . ' ' . $this->comment . "\n";
return "\t" . $this->name . ' = ' . str_replace("\n", " \\\n", $this->value) . ($this->comment ? ' ' . $this->comment : '') . "\n";
}
}

View file

@ -98,6 +98,12 @@ abstract class phpbb_functional_search_base extends phpbb_functional_test_case
{
$values["config[search_type]"] = $this->search_backend;
if (strpos($this->search_backend, 'fulltext_sphinx'))
{
// Set board Sphinx id in according to respective setup-sphinx.sh $ID value
$values["config[fulltext_sphinx_id]"] = 'saw9zf2fdhp1goue';
}
try
{
$form->setValues($values);
@ -111,6 +117,7 @@ abstract class phpbb_functional_search_base extends phpbb_functional_test_case
}
$crawler = self::submit($form);
$this->purge_cache();
$form = $crawler->selectButton($this->lang('YES'))->form();
$values = $form->getValues();

View file

@ -20,8 +20,29 @@ class phpbb_functional_search_sphinx_test extends phpbb_functional_search_base
{
protected $search_backend = 'phpbb\search\backend\fulltext_sphinx';
protected function create_search_index($backend = null)
{
parent::create_search_index($backend);
$this->purge_cache();
if (!$backend || $this->search_backend == $backend)
{
// After creating phpBB search index, build Sphinx index
exec('sudo -S service sphinxsearch stop', $output, $retval); // Attemtp to stop sphinxsearch service in case it's running
exec('sudo -S indexer --all', $output, $retval); // Run sphinxsearch indexer
exec('sudo -S service sphinxsearch start', $output, $retval); // Attempt to start sphinxsearch service again
}
}
public function test_search_backend()
{
$this->markTestIncomplete('Sphinx Tests are not supported');
if ($this->db->sql_layer != 'mysqli') // Sphinx test runs on MySQL/MariaDB only so far
{
$this->markTestIncomplete('Sphinx Tests are not supported');
}
else
{
parent::test_search_backend();
}
}
}