diff --git a/phpBB/posting.php b/phpBB/posting.php index 309886f096..0ecaaae392 100644 --- a/phpBB/posting.php +++ b/phpBB/posting.php @@ -31,11 +31,11 @@ include($phpbb_root_path . 'includes/bbcode.'.$phpEx); // function clean_words($entry, &$stopword_list, &$synonym_list) { - $init_match = array("^", "$", "&", "(", ")", "<", ">", "`", "'", "|", ",", "@", "_", "?", "%"); - $init_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", "", " ", " ", " ", " ", " ", " "); + static $init_match = array('^', '$', '&', '(', ')', '<', '>', '`', "'", '|', ',', '@', '_', '?', '%'); + static $init_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", "", " ", " ", " ", " ", " ", " "); - $later_match = array("-", "~", "+", ".", "[", "]", "{", "}", ":", "\\", "/", "=", "#", "\"", ";", "*", "!"); - $later_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "); + static $later_match = array("-", "~", "+", ".", "[", "]", "{", "}", ":", "\\", "/", "=", "#", "\"", ";", "*", "!"); + static $later_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "); $entry = " " . stripslashes(strip_tags(strtolower($entry))) . " "; @@ -84,37 +84,13 @@ function split_words(&$entry) return $split_entries[1]; } -function remove_old( $post_id ) -{ - global $db; - - if( count($word_id_list) ) - { - $word_id_sql = ""; - for($i = 0; $i < count($word_id_list); $i++ ) - { - if( $word_id_sql != "" ) - { - $word_id_sql .= ", "; - } - $word_id_sql .= $word_id_list[$i]['word_id']; - } - $word_id_sql = " AND sl.word_id IN ($word_id_sql)"; - } - else - { - $word_id_sql = ""; - } - -} - function remove_common($percent, $word_id_list = array()) { global $db; + $word_id_sql = ""; if( count($word_id_list) ) { - $word_id_sql = ""; for($i = 0; $i < count($word_id_list); $i++ ) { if( $word_id_sql != "" ) @@ -123,69 +99,67 @@ function remove_common($percent, $word_id_list = array()) } $word_id_sql .= $word_id_list[$i]['word_id']; } - $word_id_sql = " AND w.word_id IN ($word_id_sql)"; + $word_id_sql = "WHERE word_id IN ($word_id_sql) "; + } - $sql = "SELECT w.word_id, SUM(m.word_count) AS post_occur_count - FROM " . SEARCH_WORD_TABLE . " w, " . SEARCH_MATCH_TABLE . " m - WHERE w.word_id = m.word_id - $word_id_sql - GROUP BY w.word_id - ORDER BY post_occur_count DESC"; - if( !$result = $db->sql_query($sql) ) + $sql = "SELECT SUM(forum_posts) AS total_posts + FROM " . FORUMS_TABLE ; + $result = $db->sql_query($sql); + if( !$result ) + { + message_die(GENERAL_ERROR, "Couldn't obtain post count", "", __LINE__, __FILE__, $sql); + } + + $row = $db->sql_fetchrow($result); + + $common_threshold = floor($row['total_posts'] * $percent); + + $sql = "SELECT word_id + FROM " . SEARCH_MATCH_TABLE . " + $word_id_sql + GROUP BY word_id + HAVING COUNT(word_id) > $common_threshold"; + $result = $db->sql_query($sql); + if( !$result ) + { + message_die(GENERAL_ERROR, "Couldn't obtain common word list", "", __LINE__, __FILE__, $sql); + } + + if( $post_count = $db->sql_numrows($result) ) + { + $common_word_id_list = array(); + while( $row = $db->sql_fetchrow($result) ) { - message_die(GENERAL_ERROR, "Couldn't obtain search word sums", "", __LINE__, __FILE__, $sql); + $common_word_id_list[] = $row['word_id']; } - if( $post_count = $db->sql_numrows($result) ) + $db->sql_freeresult($result); + + if(count($common_word_ids) != 0) { - $rowset = $db->sql_fetchrowset($result); + $common_word_id_list = implode(", ", $common_word_id_list); - $sql = "SELECT COUNT(post_id) AS total_posts - FROM " . POSTS_TABLE; - + $sql = "UPDATE " . SEARCH_WORD_TABLE . " + SET word_common = 1 + WHERE word_id IN ($common_word_id_list)"; $result = $db->sql_query($sql); if( !$result ) { - message_die(GENERAL_ERROR, "Couldn't obtain post count", "", __LINE__, __FILE__, $sql); + message_die(GENERAL_ERROR, "Couldn't delete word list entry", "", __LINE__, __FILE__, $sql); } - $row = $db->sql_fetchrow($result); - - $words_removed = 0; - $word_id_sql = ""; - for($i = 0; $i < $post_count; $i++) + $sql = "DELETE FROM " . SEARCH_WORD_MATCH . " + WHERE word_id IN ($common_word_id_list)"; + $result = $db->sql_query($sql); + if( !$result ) { - if( ( $rowset[$i]['post_occur_count'] / $row['total_posts'] ) >= $percent ) - { - if( $word_id_sql != "" ) - { - $word_id_sql .= ", "; - } - $word_id_sql .= $rowset[$i]['word_id']; - - $words_removed++; - } - } - - if( $word_id_sql != "" ) - { - $sql = "DELETE FROM " . SEARCH_WORD_TABLE . " - WHERE word_id IN ($word_id_sql)"; - $result = $db->sql_query($sql); - if( !$result ) - { - message_die(GENERAL_ERROR, "Couldn't delete word list entry", "", __LINE__, __FILE__, $sql); - } - - $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . " - WHERE word_id IN ($word_id_sql)"; - $result = $db->sql_query($sql); - if( !$result ) - { - message_die(GENERAL_ERROR, "Couldn't delete word match entry", "", __LINE__, __FILE__, $sql); - } + message_die(GENERAL_ERROR, "Couldn't delete word match entry", "", __LINE__, __FILE__, $sql); } } + else + { + return 0; + } } return $words_removed; @@ -274,11 +248,10 @@ function remove_old_words($post_id) } $word_id_sql .= $check_words[$i]['word_id']; } - $word_id_sql = "word_id IN ($word_id_sql)"; $sql = "SELECT word_id, COUNT(post_id) AS post_occur_count FROM " . SEARCH_MATCH_TABLE . " - WHERE $word_id_sql + WHERE word_id IN ($word_id_sql) GROUP BY word_id ORDER BY post_occur_count DESC"; if( !$result = $db->sql_query($sql) ) @@ -333,39 +306,32 @@ function remove_old_words($post_id) return; } -function add_search_words($post_id, $text) +function add_search_words($post_id, $post_text, $post_title = "") { global $db, $phpbb_root_path, $board_config, $lang; $stopword_array = @file($phpbb_root_path . "language/lang_" . $board_config['default_lang'] . "/search_stopwords.txt"); $synonym_array = @file($phpbb_root_path . "language/lang_" . $board_config['default_lang'] . "/search_synonyms.txt"); - $search_text = clean_words($text, $stopword_array, $synonym_array); + $search_text = clean_words($post_text, $stopword_array, $synonym_array); +// $search_title = clean_words($post_title, $stopword_array, $synonym_array); + $search_matches = split_words($search_text); if( count($search_matches) ) { $word = array(); - $word_count = array(); - $phrase_string = $text; - $sql_in = ""; for ($j = 0; $j < count($search_matches); $j++) { $this_word = strtolower(trim($search_matches[$j])); - if( empty($word_count[$this_word]) ) - { - $word_count[$this_word] = 1; - } - $new_word = true; for($k = 0; $k < count($word); $k++) { - if( $this_word == $word[$k] ) + if( $this_word == $word[$k] || $this_word == "" ) { $new_word = false; - $word_count[$this_word]++; } } @@ -375,21 +341,19 @@ function add_search_words($post_id, $text) } } - for($j = 0; $j < count($word); $j++) - { - if( $word[$j] ) + $word_sql_in = ""; + for ($j = 0; $j < count($word); $j++) + { + if( $word_sql_in != "" ) { - if( $sql_in != "" ) - { - $sql_in .= ", "; - } - $sql_in .= "'" . $word[$j] . "'"; + $word_sql_in .= ", "; } + $word_sql_in .= "'" . $word[$j] . "'"; } - $sql = "SELECT word_id, word_text + $sql = "SELECT word_id, word_text, word_common FROM " . SEARCH_WORD_TABLE . " - WHERE word_text IN ($sql_in)"; + WHERE word_text IN ($word_sql_in)"; $result = $db->sql_query($sql); if( !$result ) { @@ -401,11 +365,13 @@ function add_search_words($post_id, $text) $check_words = $db->sql_fetchrowset($result); } + $match_word = array(); for ($j = 0; $j < count($word); $j++) { if( $word[$j] ) { $new_match = true; + $word_common = false; if( $word_check_count ) { @@ -413,12 +379,22 @@ function add_search_words($post_id, $text) { if( $word[$j] == $check_words[$k]['word_text'] ) { + if( $check_words[$k]['word_common'] ) + { + $word_common = true; + } + $new_match = false; - $word_id = $check_words[$k]['word_id']; } + } } + if( !$word_common ) + { + $match_word[] = "'" . $word[$j] . "'"; + } + if( $new_match ) { $sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text) @@ -428,22 +404,25 @@ function add_search_words($post_id, $text) { message_die(GENERAL_ERROR, "Couldn't insert new word", "", __LINE__, __FILE__, $sql); } - - $word_id = $db->sql_nextid(); - } - - $sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, word_count, title_match) - VALUES ($post_id, $word_id, " . $word_count[$word[$j]] . ", 0)"; - $result = $db->sql_query($sql); - if( !$result ) - { - message_die(GENERAL_ERROR, "Couldn't insert new word match", "", __LINE__, __FILE__, $sql); } } } + + $word_sql_in = implode(", ", $match_word); + + $sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match) + SELECT $post_id, word_id, 0 + FROM " . SEARCH_WORD_TABLE . " + WHERE word_text IN ($word_sql_in)"; + $result = $db->sql_query($sql); + if( !$result ) + { + message_die(GENERAL_ERROR, "Couldn't insert new word matches", "", __LINE__, __FILE__, $sql); + } + } - remove_common(0.25, $check_words); + remove_common(0.15, $check_words); return; } diff --git a/phpBB/search.php b/phpBB/search.php index 819515de8a..bcf490b696 100644 --- a/phpBB/search.php +++ b/phpBB/search.php @@ -277,7 +277,6 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) $sortby_dir = "DESC"; } - $cleaned_search = clean_words_search($query_keywords); $cleaned_search = remove_stop_words($cleaned_search, $stopword_array); $cleaned_search = replace_synonyms($cleaned_search, $synonym_array); @@ -285,149 +284,139 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) $split_search = array(); $split_search = split_words($cleaned_search); - $word_match = array(); - $current_match_type = "and"; + $current_match_type = ""; + $word_count = 0; + $word_store = array(); + $post_id_match_list = array(); - for($i = 0; $i < count($split_search); $i++) + for($i = 0; $i < min(count($split_search), 10); $i++) { - if( $split_search[$i] == "and" ) + $word_match = str_replace("*", "%", trim($split_search[$i])); + + if( $word_match == "and" ) { $current_match_type = "and"; } - else if( $split_search[$i] == "or" ) + else if( $word_match == "or" ) { $current_match_type = "or"; } - else if( $split_search[$i] == "not" ) + else if( $word_match == "not" ) { $current_match_type = "not"; } else { - if( !empty($search_all_terms) ) + if( $current_match_type == "" ) { $current_match_type = "and"; } - $word_match[$current_match_type][] = $split_search[$i]; - } - } - - @reset($word_match); - - $word_count = 0; - $result_list = array(); - - while( list($match_type, $match_word_list) = each($word_match) ) - { - for($i = 0; $i < count($match_word_list); $i++ ) - { - $match_word = str_replace("*", "%", $match_word_list[$i]); - - $sql = "SELECT m.post_id, m.word_count - FROM " . SEARCH_WORD_TABLE . " w, " . SEARCH_MATCH_TABLE . " m - WHERE w.word_text LIKE '$match_word' - AND m.word_id = w.word_id - ORDER BY m.post_id, m.word_count DESC"; - $result = $db->sql_query($sql); - if( !$result ) + if( $word_match != "" ) { - message_die(GENERAL_ERROR, "Couldn't matched posts", "", __LINE__, __FILE__, $sql); - } + $word_store[] = $word_match; - $row = array(); + $sql = "SELECT m.post_id + FROM " . SEARCH_WORD_TABLE . " w, " . SEARCH_MATCH_TABLE . " m + WHERE w.word_text LIKE '$word_match' + AND m.word_id = w.word_id + ORDER BY m.post_id DESC"; + $result = $db->sql_query($sql); + if( !$result ) + { + message_die(GENERAL_ERROR, "Couldn't matched posts", "", __LINE__, __FILE__, $sql); + } - while( $temp_row = $db->sql_fetchrow($result) ) - { - $row['' . $temp_row['post_id'] . ''] = $temp_row['word_count']; - } - - @reset($row); - - while( list($post_id, $match_count) = each($row) ) - { if( !$word_count ) { - $result_list['' . $post_id . ''] = $match_count; - } - else if( $match_type == "and" ) - { - $result_list['' . $post_id . ''] = ( $result_list['' . $post_id . ''] ) ? $result_list['' . $post_id . ''] + intval($match_count) : 0; - } - else if( $match_type == "or" ) - { - if( $result_list['' . $post_id . ''] ) + while( $row = $db->sql_fetchrow($result) ) { - $result_list['' . $post_id . ''] += intval($match_count); - } - else - { - $result_list['' . $post_id . ''] = 0; - $result_list['' . $post_id . ''] += intval($match_count); + $post_id_match_list[] = $row['post_id']; } } - else if( $match_type == "not" ) + else { - $result_list['' . $post_id . ''] = 0; + if( $current_match_type == "or" ) + { + while( $row = $db->sql_fetchrow($result) ) + { + $post_id_match_list[] = $row['post_id']; + } + } + else if( $current_match_type == "and" ) + { + $rowset = $db->sql_fetchrowset($result); + + for($j = 0; $j < count($post_id_match_list); $j++) + { + $and_match = false; + for($k = 0; $k < count($rowset); $k++) + { + if( $post_id_match_list[$j] == $rowset[$k]['post_id'] ) + { + $and_match = true; + } + } + + if( !$and_match ) + { + $post_id_match_list[$j] = 0; + } + } + } + else if( $current_match_type == "not" ) + { + $rowset = $db->sql_fetchrowset($result); + + for($j = 0; $j < count($post_id_match_list); $j++) + { + $not_match = false; + for($k = 0; $k < count($rowset); $k++) + { + if( $post_id_match_list[$j] == $rowset[$k]['post_id'] ) + { + $not_match = true; + } + } + + if( $not_match ) + { + $post_id_match_list[$j] = 0; + } + } + } } + + $word_count++; + } - if( $match_type == "and" && $word_count ) - { - @reset($row); - @reset($result_list); - - while( list($post_id, $match_count) = each($result_list) ) - { - if( !$row['' . $post_id . ''] ) - { - $result_list['' . $post_id . ''] = 0; - } - } - } - $word_count++; + $current_match_type = ""; } } - @reset($result_list); - $total_posts = 0; $sql_post_id_in = ""; - while( list($post_id, $matches) = each($result_list) ) + for($i = 0; $i < count($post_id_match_list); $i++) { - if( $matches ) + if( $post_id_match_list[$i] ) { if( $sql_post_id_in != "" ) { $sql_post_id_in .= ", "; } - $sql_post_id_in .= $post_id; + $sql_post_id_in .= $post_id_match_list[$i]; $total_posts++; } } + - // - // Start building appropriate SQL query - // -/* switch(SQL_LAYER) - { - case 'mysql': - case 'postgresql': - $post_text_substring = "SUBSTRING(pt.post_text, 1, $return_chars) AS post_text"; - break; - - case 'mssql': - case 'odbc': - $post_text_substring = "SUBSTR(pt.post_text, 1, $return_chars) AS post_text"; - break; - } -*/ $sql_fields = ( $show_results == "posts") ? "pt.post_text, pt.post_subject, p.post_id, p.post_time, p.post_username, f.forum_name, t.topic_id, t.topic_title, t.topic_poster, t.topic_time, u.username, u.user_id, u.user_sig, u.user_sig_bbcode_uid" : "f.forum_id, f.forum_name, t.topic_id, t.topic_title, t.topic_poster, t.topic_time, t.topic_views, t.topic_replies, t.topic_last_post_id, u.username, u.user_id, u2.username as user2, u2.user_id as id2, p.post_time, p.post_username" ; - $sql_from = ( $show_results == "posts") ? FORUMS_TABLE . " f, " . TOPICS_TABLE . " t, " . USERS_TABLE . " u, " . POSTS_TABLE . " p, " . POSTS_TEXT_TABLE . " pt" : FORUMS_TABLE . " f, " . TOPICS_TABLE . " t, " . USERS_TABLE . " u, " . POSTS_TABLE . " p, " . POSTS_TABLE . " p2, " . USERS_TABLE . " u2"; + $sql_from = ( $show_results == "posts") ? FORUMS_TABLE . " f, " . TOPICS_TABLE . " t, " . USERS_TABLE . " u, " . POSTS_TABLE . " p, " . POSTS_TEXT_TABLE . " pt" : FORUMS_TABLE . " f, " . TOPICS_TABLE . " t, " . USERS_TABLE . " u, " . POSTS_TABLE . " p, " . USERS_TABLE . " u2"; - $sql_where = ( $show_results == "posts") ? "pt.post_id = p.post_id AND f.forum_id = p.forum_id AND p.topic_id = t.topic_id AND p.poster_id = u.user_id" : "f.forum_id = p.forum_id AND t.topic_id = p.topic_id AND u.user_id = t.topic_poster AND p2.post_id = t.topic_last_post_id AND u2.user_id = p2.poster_id"; + $sql_where = ( $show_results == "posts") ? "pt.post_id = p.post_id AND f.forum_id = p.forum_id AND p.topic_id = t.topic_id AND p.poster_id = u.user_id" : "f.forum_id = t.forum_id AND u.user_id = t.topic_poster AND p.post_id = t.topic_last_post_id AND u2.user_id = p.poster_id"; $search_sql = ""; @@ -436,16 +425,42 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) // if( $sql_post_id_in != "" ) { - $search_sql .= "p.post_id IN ($sql_post_id_in) "; - } + if( $show_results == "posts" ) + { + $search_sql .= "p.post_id IN ($sql_post_id_in) "; + } + else + { + $sql = "SELECT topic_id + FROM " . POSTS_TABLE . " + WHERE post_id IN ($sql_post_id_in) + GROUP BY topic_id"; + $result = $db->sql_query($sql); + if( !$result ) + { + message_die(GENERAL_ERROR, "Couldn't matched posts", "", __LINE__, __FILE__, $sql); + } + $sql_post_id_in = ""; + while( $row = $db->sql_fetchrow($result) ) + { + if( $sql_post_id_in != "" ) + { + $sql_post_id_in .= ", "; + } + $sql_post_id_in .= $row['topic_id']; + } + + $search_sql .= "t.topic_id IN ($sql_post_id_in) "; + + } + } // // Author name search // if( $query_author != "" ) { - $search_sql = preg_replace("/\(\)/", "", $search_sql); - $query_author = preg_replace("/\*/", "%", $query_author); + $query_author = str_replace("*", "%", trim($query_author)); if( $show_results == "posts" ) { @@ -467,13 +482,10 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) } // - // If user is logged in then we'll - // check to see which (if any) private - // forums they are allowed to view and - // include them in the search. + // If user is logged in then we'll check to see which (if any) private + // forums they are allowed to view and include them in the search. // - // If not logged in we explicitly prevent - // searching of private forums + // If not logged in we explicitly prevent searching of private forums // if( $search_sql != "" || $search_id == "newposts" || $search_id == "egosearch" || $search_id == "unanswered" ) { @@ -528,11 +540,6 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) $sql .= " AND p.post_time >= $search_time "; } - if( $show_results != "posts") - { - $sql .= " GROUP BY t.topic_id"; - } - $sql .= " ORDER BY " . $sortby_sql[$sortby] . " $sortby_dir"; if( !$result = $db->sql_query($sql) ) @@ -540,13 +547,16 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) message_die(GENERAL_ERROR, "Couldn't obtain search results", "", __LINE__, __FILE__, $sql); } - if( ( $total_match_count = $db->sql_numrows($result) ) > 500 ) + $total_match_count = $db->sql_numrows($result); +/* if( $total_match_count > 500 ) { - message_die(GENERAL_MESSAGE, "Your search returned too many matches, refine your search criteria and try again"); + message_die(GENERAL_MESSAGE, $lang['Too_many_results']);//"Your search returned too many matches, refine your search criteria and try again"; } - +*/ $searchset = $db->sql_fetchrowset($result); + $db->sql_freeresult($result); + // // Clean up search results table // @@ -624,6 +634,9 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) message_die(GENERAL_ERROR, "Couldn't insert search results", "", __LINE__, __FILE__, $sql); } } + + $db->sql_freeresult($result); + } else { @@ -690,6 +703,8 @@ else if( $query_keywords != "" || $query_author != "" || $search_id ) } $searchset = $db->sql_fetchrowset($result); + + $db->sql_freeresult($result); } else { @@ -1206,4 +1221,4 @@ $template->pparse("body"); include($phpbb_root_path . 'includes/page_tail.'.$phpEx); -?> +?> \ No newline at end of file