diff --git a/phpBB/includes/functions_content.php b/phpBB/includes/functions_content.php index cdbc4aee09..9034f785ff 100644 --- a/phpBB/includes/functions_content.php +++ b/phpBB/includes/functions_content.php @@ -336,7 +336,7 @@ function get_context(string $text, array $words, int $length = 400): string $text = htmlspecialchars_decode($text); // Replace all spaces/invisible characters with single spaces - $text = preg_replace("/\s+/u", ' ', $text); + $text = preg_replace("/[\p{Z}\h\v]+/u", ' ', $text); $text_length = utf8_strlen($text); @@ -351,7 +351,6 @@ function get_context(string $text, array $words, int $length = 400): string $word_indexes[$pos] = $word; } } - if (!empty($word_indexes)) { ksort($word_indexes); @@ -400,21 +399,21 @@ function get_context(string $text, array $words, int $length = 400): string $fragment_end = $end - $start + 1; // Find the first valid alphanumeric character in the fragment to don't cut words - if ($start > 0 && preg_match('/[^\p{L}\p{N}][\p{L}\p{N}]/ui', $fragment, $matches, PREG_OFFSET_CAPTURE)) + if ($start > 0 && preg_match('/[^\p{L}\p{N}][\p{L}\p{N}]/u', $fragment, $matches, PREG_OFFSET_CAPTURE)) { - $fragment_start = (int) $matches[0][1] + 1; // first valid alphanumeric character + $fragment_start = mb_strlen(substr($fragment, 0, (int) $matches[0][1])) + 1; } // Find the last valid alphanumeric character in the fragment to don't cut words - if ($end < $text_length - 1 && preg_match_all('/[\p{L}\p{N}][^\p{L}\p{N}]/ui', $fragment, $matches, PREG_OFFSET_CAPTURE)) + if ($end < $text_length - 1 && preg_match_all('/[\p{L}\p{N}][^\p{L}\p{N}]/u', $fragment, $matches, PREG_OFFSET_CAPTURE)) { - $fragment_end = end($matches[0])[1]; // last valid alphanumeric character + $fragment_end = mb_strlen(substr($fragment, 0, end($matches[0])[1])); } $output[] = utf8_substr($fragment, $fragment_start, $fragment_end - $fragment_start + 1); } - return ($fragments[0][0] !== 0 ? '... ' : '') . htmlspecialchars(implode(' ... ', $output)) . ($end < $text_length - 1 ? ' ...' : ''); + return ($fragments[0][0] !== 0 ? '... ' : '') . utf8_htmlspecialchars(implode(' ... ', $output)) . ($end < $text_length - 1 ? ' ...' : ''); } /** diff --git a/tests/functions_content/get_context_test.php b/tests/functions_content/get_context_test.php index d4d087d675..93136d69fb 100644 --- a/tests/functions_content/get_context_test.php +++ b/tests/functions_content/get_context_test.php @@ -117,7 +117,7 @@ class phpbb_functions_content_get_context_test extends TestCase 'text' => 'Manzana,plátano,naranja,fresa,mango,uva,piña,pera,kiwi,cereza,sandía,melón,papaya,arándano,durazno', 'words' => ['piña'], 'length' => 20, - 'expected' => '... uva,piña,pera, ...', + 'expected' => '... uva,piña,pera ...', ] ]; }