mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-08 04:18:52 +00:00
[ticket/17387] Fix handling unicode strings
PHPBB-17387
This commit is contained in:
parent
472b36877c
commit
c26ded6025
2 changed files with 7 additions and 8 deletions
|
@ -336,7 +336,7 @@ function get_context(string $text, array $words, int $length = 400): string
|
||||||
$text = htmlspecialchars_decode($text);
|
$text = htmlspecialchars_decode($text);
|
||||||
|
|
||||||
// Replace all spaces/invisible characters with single spaces
|
// Replace all spaces/invisible characters with single spaces
|
||||||
$text = preg_replace("/\s+/u", ' ', $text);
|
$text = preg_replace("/[\p{Z}\h\v]+/u", ' ', $text);
|
||||||
|
|
||||||
$text_length = utf8_strlen($text);
|
$text_length = utf8_strlen($text);
|
||||||
|
|
||||||
|
@ -351,7 +351,6 @@ function get_context(string $text, array $words, int $length = 400): string
|
||||||
$word_indexes[$pos] = $word;
|
$word_indexes[$pos] = $word;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($word_indexes))
|
if (!empty($word_indexes))
|
||||||
{
|
{
|
||||||
ksort($word_indexes);
|
ksort($word_indexes);
|
||||||
|
@ -400,21 +399,21 @@ function get_context(string $text, array $words, int $length = 400): string
|
||||||
$fragment_end = $end - $start + 1;
|
$fragment_end = $end - $start + 1;
|
||||||
|
|
||||||
// Find the first valid alphanumeric character in the fragment to don't cut words
|
// Find the first valid alphanumeric character in the fragment to don't cut words
|
||||||
if ($start > 0 && preg_match('/[^\p{L}\p{N}][\p{L}\p{N}]/ui', $fragment, $matches, PREG_OFFSET_CAPTURE))
|
if ($start > 0 && preg_match('/[^\p{L}\p{N}][\p{L}\p{N}]/u', $fragment, $matches, PREG_OFFSET_CAPTURE))
|
||||||
{
|
{
|
||||||
$fragment_start = (int) $matches[0][1] + 1; // first valid alphanumeric character
|
$fragment_start = mb_strlen(substr($fragment, 0, (int) $matches[0][1])) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the last valid alphanumeric character in the fragment to don't cut words
|
// Find the last valid alphanumeric character in the fragment to don't cut words
|
||||||
if ($end < $text_length - 1 && preg_match_all('/[\p{L}\p{N}][^\p{L}\p{N}]/ui', $fragment, $matches, PREG_OFFSET_CAPTURE))
|
if ($end < $text_length - 1 && preg_match_all('/[\p{L}\p{N}][^\p{L}\p{N}]/u', $fragment, $matches, PREG_OFFSET_CAPTURE))
|
||||||
{
|
{
|
||||||
$fragment_end = end($matches[0])[1]; // last valid alphanumeric character
|
$fragment_end = mb_strlen(substr($fragment, 0, end($matches[0])[1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
$output[] = utf8_substr($fragment, $fragment_start, $fragment_end - $fragment_start + 1);
|
$output[] = utf8_substr($fragment, $fragment_start, $fragment_end - $fragment_start + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ($fragments[0][0] !== 0 ? '... ' : '') . htmlspecialchars(implode(' ... ', $output)) . ($end < $text_length - 1 ? ' ...' : '');
|
return ($fragments[0][0] !== 0 ? '... ' : '') . utf8_htmlspecialchars(implode(' ... ', $output)) . ($end < $text_length - 1 ? ' ...' : '');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -117,7 +117,7 @@ class phpbb_functions_content_get_context_test extends TestCase
|
||||||
'text' => 'Manzana,plátano,naranja,fresa,mango,uva,piña,pera,kiwi,cereza,sandía,melón,papaya,arándano,durazno',
|
'text' => 'Manzana,plátano,naranja,fresa,mango,uva,piña,pera,kiwi,cereza,sandía,melón,papaya,arándano,durazno',
|
||||||
'words' => ['piña'],
|
'words' => ['piña'],
|
||||||
'length' => 20,
|
'length' => 20,
|
||||||
'expected' => '... uva,piña,pera, ...',
|
'expected' => '... uva,piña,pera ...',
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue