phpbb/phpBB/includes/search/fulltext_mysql.php
Nils Adermann 007f4f6987 - fixed the age calculation (note: turn on your brain before commiting something like this the next time) [Bug #3337]
- removed the split_words array, introduced an enforced search_query
- the forum used for global topics in the search is now a forum, and no longer a category [Bug #2561]
- Bug #3404
- allow accessing reports by report_id, in contrast to mcp_queue this cannot just use the post id, since there can be multiple closed reports per post, so closed reports have to be accessed by report id, open reports, can optionally be accessed by report id or post id [Bug #3149]
- only attempt to unflag reported topics on closing a report when there are any without other reported posts [Bug #3057]
- updated fulltext_mysql to use the the search_query string
- overwrote the old fulltext_native with our improved version since it consumes too much time to maintain boths and we would switch to the improved version later anyway
- always show a link to search a user's posts even if the postcount is zero since he  might only have posted in forums which do not count posts [Bug #3267]


git-svn-id: file:///svn/phpbb/trunk@6211 89ea8834-ac86-4346-8a33-228a782c2dd0
2006-07-27 19:02:47 +00:00

771 lines
No EOL
20 KiB
PHP

<?php
/**
*
* @package search
* @version $Id$
* @copyright (c) 2005 phpBB Group
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
*
*/
/**
*/
if (!defined('IN_PHPBB'))
{
exit;
}
/**
* @ignore
*/
include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
/**
* fulltext_mysql
* Fulltext search for MySQL
* @package search
*/
class fulltext_mysql extends search_backend
{
var $stats = array();
var $word_length = array();
var $split_words = array();
var $common_words = array();
function fulltext_mysql(&$error)
{
global $config;
$this->word_length = array('min' => $config['fulltext_mysql_min_word_len'], 'max' => $config['fulltext_mysql_max_word_len']);
$error = false;
}
/**
* Checks for correct MySQL version and stores min/max word length in the config
*/
function init()
{
global $db, $user;
if (strpos(SQL_LAYER, 'mysql') === false)
{
return $user->lang['FULLTEXT_MYSQL_INCOMPATIBLE_VERSION'];
}
$result = $db->sql_query('SELECT VERSION() AS mysql_version');
$version = $db->sql_fetchfield('mysql_version');
$db->sql_freeresult($result);
if (!preg_match('#^4|5|6#s', $version))
{
return $user->lang['FULLTEXT_MYSQL_INCOMPATIBLE_VERSION'];
}
$result = $db->sql_query('SHOW TABLE STATUS LIKE \'' . POSTS_TABLE . '\'');
$info = $db->sql_fetchrow($result);
$db->sql_freeresult($result);
$engine = '';
if (isset($info['Engine']))
{
$engine = $info['Engine'];
}
else if (isset($info['Type']))
{
$engine = $info['Type'];
}
if ($engine != 'MyISAM')
{
return $user->lang['FULLTEXT_MYSQL_NOT_MYISAM'];
}
$sql = 'SHOW VARIABLES
LIKE \'ft\_%\'';
$result = $db->sql_query($sql);
$mysql_info = array();
while ($row = $db->sql_fetchrow($result))
{
$mysql_info[$row['Variable_name']] = $row['Value'];
}
$db->sql_freeresult($result);
set_config('fulltext_mysql_max_word_len', $mysql_info['ft_max_word_len']);
set_config('fulltext_mysql_min_word_len', $mysql_info['ft_min_word_len']);
return false;
}
/**
* Splits keywords entered by a user into an array of words stored in $this->split_words
* Stores the tidied search query in $this->search_query
*
* @param string $keywords Contains the keyword as entered by the user
* @param string $terms is either 'all' or 'any'
* @return false if no valid keywords were found and otherwise true
*/
function split_keywords(&$keywords, $terms)
{
global $config;
$this->get_ignore_words();
$this->get_synonyms();
if ($terms == 'all')
{
$match = array('#\sand\s#i', '#\sor\s#i', '#\snot\s#i', '#\+#', '#-#', '#\|#');
$replace = array(' +', ' |', ' -', ' +', ' -', ' |');
$keywords = preg_replace($match, $replace, $keywords);
}
$match = array();
// New lines, carriage returns
$match[] = "#[\n\r]+#";
// NCRs like &nbsp; etc.
$match[] = '#(&amp;|&)[\#a-z0-9]+?;#i';
// Filter out as above
$keywords = preg_replace($match, ' ', trim($keywords));
// Split words
$split_keywords = preg_replace('#([^\w\'*])#', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
$matches = array();
preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#', $split_keywords, $matches);
$this->split_words = $matches[1];
if (sizeof($this->ignore_words))
{
$this->common_words = array_intersect($this->split_words, $this->ignore_words);
$this->split_words = array_diff($this->split_words, $this->ignore_words);
}
if (sizeof($this->replace_synonym))
{
$this->split_words = str_replace($this->replace_synonym, $this->match_synonym, $this->split_words);
}
foreach ($this->split_words as $i => $word)
{
$clean_word = preg_replace('#^[+\-|]#', '', $word);
// check word length
$clean_len = strlen(str_replace('*', '', $clean_word));
if (($clean_len < $config['fulltext_mysql_min_word_len']) || ($clean_len > $config['fulltext_mysql_max_word_len']))
{
$this->common_words[] = $word;
unset($this->split_words[$i]);
}
}
$this->search_query = implode(' ', $this->split_words);
if (sizeof($this->split_words))
{
$this->split_words = array_values($this->split_words);
sort($this->split_words);
return true;
}
return false;
}
/**
* Turns text into an array of words
*/
function split_message($text)
{
global $config;
$this->get_ignore_words();
$this->get_synonyms();
// Split words
$text = preg_replace('#([^\w\'*])#', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
$matches = array();
preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#', $text, $matches);
$text = $matches[1];
if (sizeof($this->ignore_words))
{
$text = array_diff($text, $this->ignore_words);
}
if (sizeof($this->replace_synonym))
{
$text = str_replace($this->replace_synonym, $this->match_synonym, $text);
}
// remove too short or too long words
$text = array_values($text);
for ($i = 0, $n = sizeof($text); $i < $n; $i++)
{
$text[$i] = trim($text[$i]);
if (strlen($text[$i]) < $config['fulltext_mysql_min_word_len'] || strlen($text[$i]) > $config['fulltext_mysql_max_word_len'])
{
unset($text[$i]);
}
}
return array_values($text);
}
/**
* Performs a search on keywords depending on display specific params.
*
* @param array $id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
* @param int $start indicates the first index of the page
* @param int $per_page number of ids each page is supposed to contain
* @return total number of results
*/
function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
{
global $config, $db;
// No keywords? No posts.
if (!sizeof($this->split_words))
{
return false;
}
// generate a search_key from all the options to identify the results
$search_key = md5(implode('#', array(
implode(',', $this->split_words),
$type,
$fields,
$terms,
$sort_days,
$sort_key,
$topic_id,
implode(',', $ex_fid_ary),
implode(',', $m_approve_fid_ary),
implode(',', $author_ary)
)));
// try reading the results from cache
$result_count = 0;
if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
{
return $result_count;
}
$id_ary = array();
$join_topic = ($type == 'posts') ? false : true;
// Build sql strings for sorting
$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
$sql_sort_table = $sql_sort_join = '';
switch ($sql_sort[0])
{
case 'u':
$sql_sort_table = USERS_TABLE . ' u, ';
$sql_sort_join = ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster ';
break;
case 't':
$join_topic = true;
break;
case 'f':
$sql_sort_table = FORUMS_TABLE . ' f, ';
$sql_sort_join = ' AND f.forum_id = p.forum_id ';
break;
}
// Build some display specific sql strings
switch ($fields)
{
case 'titleonly':
$sql_match = 'p.post_subject';
$sql_match_where = ' AND p.post_id = t.topic_first_post_id';
$join_topic = true;
break;
case 'msgonly':
$sql_match = 'p.post_text';
$sql_match_where = '';
break;
case 'firstpost':
$sql_match = 'p.post_subject, p.post_text';
$sql_match_where = ' AND p.post_id = t.topic_first_post_id';
$join_topic = true;
break;
default:
$sql_match = 'p.post_subject, p.post_text';
$sql_match_where = '';
break;
}
if (!sizeof($m_approve_fid_ary))
{
$m_approve_fid_sql = ' AND p.post_approved = 1';
}
else if ($m_approve_fid_ary === array(-1))
{
$m_approve_fid_sql = '';
}
else
{
$m_approve_fid_sql = ' AND (p.post_approved = 1 OR p.forum_id NOT IN (' . implode(', ', $m_approve_fid_ary) . '))';
}
$sql_select = (!$result_count) ? 'SQL_CALC_FOUND_ROWS ' : '';
$sql_select = ($type == 'posts') ? $sql_select . 'p.post_id' : 'DISTINCT ' . $sql_select . 't.topic_id';
$sql_from = ($join_topic) ? TOPICS_TABLE . ' t, ' : '';
$field = ($type == 'posts') ? 'post_id' : 'topic_id';
$sql_author = (sizeof($author_ary) == 1) ? ' = ' . $author_ary[0] : 'IN (' . implode(',', $author_ary) . ')';
$sql_where_options = $sql_sort_join;
$sql_where_options .= ($topic_id) ? ' AND p.topic_id = ' . $topic_id : '';
$sql_where_options .= ($join_topic) ? ' AND t.topic_id = p.topic_id' : '';
$sql_where_options .= (sizeof($ex_fid_ary)) ? ' AND p.forum_id NOT IN (' . implode(',', $ex_fid_ary) . ')' : '';
$sql_where_options .= $m_approve_fid_sql;
$sql_where_options .= (sizeof($author_ary)) ? ' AND p.poster_id ' . $sql_author : '';
$sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
$sql_where_options .= $sql_match_where;
// split the words into words that have to be included or must not be included and optional words
$words = $any_words = array();
if ($terms == 'all')
{
foreach ($this->split_words as $id => $word)
{
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
{
$words[] = $word;
}
else if (strpos($word, '|') === 0)
{
$any_words[] = substr($word, 1);
}
else
{
$words[] = '+' . $word;
}
}
}
else
{
foreach ($this->split_words as $id => $word)
{
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
{
$words[] = substr($word, 1);
}
else
{
$words[] = $word;
}
}
}
// Get the ids for the current result block
$any_words = (sizeof($any_words)) ? ' +(' . implode(' ', $any_words) . ')' : '';
$sql = "SELECT $sql_select
FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p
WHERE MATCH ($sql_match) AGAINST ('" . $db->sql_escape(implode(' ', $words)) . $any_words . "' IN BOOLEAN MODE)
$sql_where_options
ORDER BY $sql_sort";
$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
while ($row = $db->sql_fetchrow($result))
{
$id_ary[] = $row[$field];
}
$db->sql_freeresult($result);
$id_ary = array_unique($id_ary);
if (!sizeof($id_ary))
{
return false;
}
// if the total result count is not cached yet, retrieve it from the db
if (!$result_count)
{
$sql = 'SELECT FOUND_ROWS() as result_count';
$result = $db->sql_query($sql);
$result_count = (int) $db->sql_fetchfield('result_count');
$db->sql_freeresult($result);
if (!$result_count)
{
return false;
}
}
// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
$this->save_ids($search_key, implode(' ', $this->split_words), $author_ary, $result_count, $id_ary, $start, $sort_dir);
$id_ary = array_slice($id_ary, 0, (int) $per_page);
return $result_count;
}
/**
* Performs a search on an author's posts without caring about message contents. Depends on display specific params
*
* @param array $id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
* @param int $start indicates the first index of the page
* @param int $per_page number of ids each page is supposed to contain
* @return total number of results
*/
function author_search($type, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
{
global $config, $db;
// No author? No posts.
if (!sizeof($author_ary))
{
return 0;
}
// generate a search_key from all the options to identify the results
$search_key = md5(implode('#', array(
'',
$type,
'',
'',
$sort_days,
$sort_key,
$topic_id,
implode(',', $ex_fid_ary),
implode(',', $m_approve_fid_ary),
implode(',', $author_ary)
)));
// try reading the results from cache
$result_count = 0;
if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
{
return $result_count;
}
$id_ary = array();
// Create some display specific sql strings
$sql_author = 'p.poster_id ' . ((sizeof($author_ary) > 1) ? 'IN (' . implode(',', $author_ary) . ')' : '= ' . $author_ary[0]);
$sql_fora = (sizeof($ex_fid_ary)) ? ' AND p.forum_id NOT IN (' . implode(',', $ex_fid_ary) . ')' : '';
$sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
$sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
// Build sql strings for sorting
$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
$sql_sort_table = $sql_sort_join = '';
switch ($sql_sort[0])
{
case 'u':
$sql_sort_table = USERS_TABLE . ' u, ';
$sql_sort_join = ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster ';
break;
case 't':
$sql_sort_table = ($type == 'posts') ? TOPICS_TABLE . ' t, ' : '';
$sql_sort_join = ($type == 'posts') ? ' AND t.topic_id = p.topic_id ' : '';
break;
case 'f':
$sql_sort_table = FORUMS_TABLE . ' f, ';
$sql_sort_join = ' AND f.forum_id = p.forum_id ';
break;
}
if (!sizeof($m_approve_fid_ary))
{
$m_approve_fid_sql = ' AND p.post_approved = 1';
}
else if ($m_approve_fid_ary == array(-1))
{
$m_approve_fid_sql = '';
}
else
{
$m_approve_fid_sql = ' AND (p.post_approved = 1 OR p.forum_id IN (' . implode($m_approve_fid_ary) . '))';
}
// If the cache was completely empty count the results
$calc_results = ($result_count) ? '' : 'SQL_CALC_FOUND_ROWS ';
// Build the query for really selecting the post_ids
if ($type == 'posts')
{
$sql = "SELECT {$calc_results}p.post_id
FROM " . $sql_sort_table . POSTS_TABLE . " p
WHERE $sql_author
$sql_topic_id
$m_approve_fid_sql
$sql_fora
$sql_sort_join
$sql_time
ORDER BY $sql_sort";
$field = 'post_id';
}
else
{
$sql = "SELECT {$calc_results}t.topic_id
FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
WHERE $sql_author
$sql_topic_id
$m_approve_fid_sql
$sql_fora
AND t.topic_id = p.topic_id
$sql_sort_join
$sql_time
GROUP BY t.topic_id
ORDER BY $sql_sort";
$field = 'topic_id';
}
// Only read one block of posts from the db and then cache it
$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
while ($row = $db->sql_fetchrow($result))
{
$id_ary[] = $row[$field];
}
$db->sql_freeresult($result);
// retrieve the total result count if needed
if (!$result_count)
{
$sql = 'SELECT FOUND_ROWS() as result_count';
$result = $db->sql_query($sql);
$result_count = (int) $db->sql_fetchfield('result_count');
$db->sql_freeresult($result);
if (!$result_count)
{
return false;
}
}
if (sizeof($id_ary))
{
$this->save_ids($search_key, '', $author_ary, $result_count, $id_ary, $start, $sort_dir);
$id_ary = array_slice($id_ary, 0, $per_page);
return $result_count;
}
return false;
}
/**
* Destroys cached search results, that contained one of the new words in a post so the results won't be outdated.
*
* @param string $mode contains the post mode: edit, post, reply, quote ...
*/
function index($mode, $post_id, &$message, &$subject, $encoding, $poster_id, $forum_id)
{
global $db;
// Split old and new post/subject to obtain array of words
$split_text = $this->split_message($message);
$split_title = ($subject) ? $this->split_message($subject) : array();
$words = array();
if ($mode == 'edit')
{
$old_text = array();
$old_title = array();
$sql = 'SELECT post_text, post_subject
FROM ' . POSTS_TABLE . "
WHERE post_id = $post_id";
$result = $db->sql_query($sql);
if ($row = $db->sql_fetchrow($result))
{
$old_text = $this->split_message($row['post_text']);
$old_title = $this->split_message($row['post_subject']);
}
$db->sql_freeresult($result);
$words = array_unique(array_merge(
array_diff($split_text, $old_text),
array_diff($split_title, $old_title),
array_diff($old_text, $split_text),
array_diff($old_title, $split_title)
));
unset($old_title);
unset($old_text);
}
else
{
$words = array_unique(array_merge($split_text, $split_title));
}
unset($split_text);
unset($split_title);
// destroy cached search results containing any of the words removed or added
$this->destroy_cache($words, array($poster_id));
unset($words);
}
/**
* Destroy cached results, that might be outdated after deleting a post
*/
function index_remove($post_ids, $author_ids, $forum_ids)
{
$this->destroy_cache(array(), $author_ids);
}
/**
* Destroy old cache entries
*/
function tidy()
{
global $db, $config;
// destroy too old cached search results
$this->destroy_cache(array());
set_config('search_last_gc', time(), true);
}
/**
* Create fulltext index
*/
function create_index($acp_module, $u_action)
{
global $db;
// Make sure we can actually use MySQL with fulltext indexes
if ($error = $this->init())
{
return $error;
}
if (empty($this->stats))
{
$this->get_stats();
}
if (!isset($this->stats['post_subject']))
{
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ADD FULLTEXT (post_subject)');
}
if (!isset($this->stats['post_text']))
{
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ADD FULLTEXT (post_text)');
}
$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
return false;
}
/**
* Drop fulltext index
*/
function delete_index($acp_module, $u_action)
{
global $db;
// Make sure we can actually use MySQL with fulltext indexes
if ($error = $this->init())
{
return $error;
}
if (empty($this->stats))
{
$this->get_stats();
}
if (isset($this->stats['post_subject']))
{
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' DROP INDEX post_subject');
}
if (isset($this->stats['post_text']))
{
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' DROP INDEX post_text');
}
$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
return false;
}
/**
* Returns true if both FULLTEXT indexes exist
*/
function index_created()
{
if (empty($this->stats))
{
$this->get_stats();
}
return (isset($this->stats['post_text']) && isset($this->stats['post_subject'])) ? true : false;
}
/**
* Returns an associative array containing information about the indexes
*/
function index_stats()
{
global $user;
if (empty($this->stats))
{
$this->get_stats();
}
return array(
$user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0,
$user->lang['FULLTEXT_MYSQL_TEXT_CARDINALITY'] => isset($this->stats['post_text']['Cardinality']) ? $this->stats['post_text']['Cardinality'] : 0,
$user->lang['FULLTEXT_MYSQL_SUBJECT_CARDINALITY'] => isset($this->stats['post_subject']['Cardinality']) ? $this->stats['post_subject']['Cardinality'] : 0,
);
}
function get_stats()
{
global $db;
if (strpos(SQL_LAYER, 'mysql') === false)
{
$this->stats = array();
return;
}
$sql = 'SHOW INDEX
FROM ' . POSTS_TABLE;
$result = $db->sql_query($sql);
while ($row = $db->sql_fetchrow($result))
{
// deal with older MySQL versions which didn't use Index_type
$index_type = (isset($row['Index_type'])) ? $row['Index_type'] : $row['Comment'];
if ($index_type == 'FULLTEXT')
{
if ($row['Column_name'] == 'post_text')
{
$this->stats['post_text'] = $row;
}
else if ($row['Column_name'] == 'post_subject')
{
$this->stats['post_subject'] = $row;
}
}
}
$db->sql_freeresult($result);
$sql = 'SELECT COUNT(post_id) as total_posts
FROM ' . POSTS_TABLE;
$result = $db->sql_query($sql);
$this->stats['total_posts'] = (int) $db->sql_fetchfield('total_posts');
$db->sql_freeresult($result);
}
}
?>