diff --git a/phpBB/includes/functions_messenger.php b/phpBB/includes/functions_messenger.php index b7f3eeda36..eafbb01cf9 100644 --- a/phpBB/includes/functions_messenger.php +++ b/phpBB/includes/functions_messenger.php @@ -1840,66 +1840,84 @@ class smtp_class } /** -* Encodes the given string for proper display in UTF-8. -* -* This version is using base64 encoded data. The downside of this -* is if the mail client does not understand this encoding the user -* is basically doomed with an unreadable subject. -* -* Please note that this version fully supports RFC 2045 section 6.8. -* -* @param string $str -* @param string $eol End of line we are using (optional to be backwards compatible) -*/ + * Encodes the given string for proper display in UTF-8 or US-ASCII. + * + * This version is based on iconv_mime_encode() implementation + * from symfomy/polyfill-iconv + * https://github.com/symfony/polyfill-iconv/blob/fd324208ec59a39ebe776e6e9ec5540ad4f40aaa/Iconv.php#L355 + * + * @param string $str + * @param string $eol Lines delimiter (optional to be backwards compatible) + * + * @return string + */ function mail_encode($str, $eol = "\r\n") { // Check if string contains ASCII only characters $is_ascii = strlen($str) === utf8_strlen($str); - // Define start delimimter, end delimiter and spacer + $scheme = $is_ascii ? 'Q' : 'B'; + + // Define start delimiter, end delimiter // Use the Quoted-Printable encoding for ASCII strings to avoid unnecessary encoding in Base64 - $start = $is_ascii ? "=?US-ASCII?Q?" : "=?UTF-8?B?"; - $end = "?="; - $delimiter = "$eol "; + $start = '=?' . ($is_ascii ? 'US-ASCII' : 'UTF-8') . '?' . $scheme . '?'; + $end = '?='; // Maximum encoded-word length is 75 as per RFC 2047 section 2. - // $split_length *must* be a multiple of 4, but <= 75 - strlen($start . $delimiter . $end)!!! - $split_length = 75 - strlen($start . $delimiter . $end); + // $split_length *must* be a multiple of 4, but <= 75 - strlen($start . $eol . $end)!!! + $split_length = 75 - strlen($start . $eol . $end); $split_length = $split_length - $split_length % 4; - // Use the Quoted-Printable encoding for ASCII strings to avoid unnecessary encoding in Base64 - // quoted_printable_encode() splits lines at length of 75 characters with =\r\n delimiter, amend this feature - $encoded_str = $is_ascii ? str_replace("=\r\n", '', quoted_printable_encode($str)) : base64_encode($str); + $line_length = strlen($start) + strlen($end); + $line_offset = strlen($start) + 1; + $line_data = ''; - // If encoded string meets the limits, we just return with the correct data. - if (strlen($encoded_str) <= $split_length) + $is_quoted_printable = 'Q' === $scheme; + + preg_match_all('/./us', $str, $chars); + $chars = $chars[0] ?? []; + + $str = []; + foreach ($chars as $char) { - return $start . $encoded_str . $end; - } + $encoded_char = $is_quoted_printable + ? $char = preg_replace_callback( + '/[=_\?\x20\x00-\x1F\x80-\xFF]/', + function ($matches) + { + $hex = dechex(ord($matches[0])); + $hex = strlen($hex) == 1 ? "0$hex" : $hex; + return '=' . strtoupper($hex); + }, + $char + ) + : base64_encode($line_data . $char); - // If there is only ASCII data, we just return what we want, correctly splitting the lines. - if ($is_ascii) - { - return $start . implode($end . $delimiter . $start, str_split($encoded_str, $split_length)) . $end; - } - - // UTF-8 data, compose encoded lines - $array = utf8_str_split($str); - $str = ''; - - while (count($array)) - { - $text = ''; - - while (count($array) && intval((strlen($text . $array[0]) + 2) / 3) << 2 <= $split_length) + if (isset($encoded_char[$split_length - $line_length])) { - $text .= array_shift($array); + if (!$is_quoted_printable) + { + $line_data = base64_encode($line_data); + } + $str[] = $start . $line_data . $end; + $line_length = $line_offset; + $line_data = ''; } - $str .= $start . base64_encode($text) . $end . $delimiter; + $line_data .= $char; + $is_quoted_printable && $line_length += strlen($char); } - return substr($str, 0, -strlen($delimiter)); + if ($line_data !== '') + { + if (!$is_quoted_printable) + { + $line_data = base64_encode($line_data); + } + $str[] = $start . $line_data . $end; + } + + return implode($eol . ' ', $str); } /** diff --git a/tests/email/headers_encoding_test.php b/tests/email/headers_encoding_test.php new file mode 100644 index 0000000000..a884a4cc21 --- /dev/null +++ b/tests/email/headers_encoding_test.php @@ -0,0 +1,48 @@ + + * @license GNU General Public License, version 2 (GPL-2.0) + * + * For full copyright and license information, please see + * the docs/CREDITS.txt file. + * + */ + +class phpbb_headers_encoding_test extends phpbb_test_case +{ + protected function setUp(): void + { + global $phpbb_root_path, $phpEx; + + if (!function_exists('mail_encode')) + { + include($phpbb_root_path . 'includes/functions_messenger.' . $phpEx); + } + } + + public function headers_encoding_data() + { + return [ + ['test@yourdomain.com ', 'Q', 'US-ASCII'], + ['test@yourdomain.com <Несуществующий почтовый адрес phpBB>', 'B', 'UTF-8'], + ["\xE3\x83\x86\xE3\x82\xB9\xE3\x83\x88\xE3\x83\x86\xE3\x82\xB9\xE3\x83\x88", 'B', 'UTF-8'], + ]; + } + + /** + * @dataProvider headers_encoding_data + */ + public function test_headers_encoding($header, $scheme, $encoding) + { + $encoded_string = mail_encode($header); + $this->assertStringStartsWith("=?$encoding?$scheme?", $encoded_string); + $this->assertStringEndsWith('?=', $encoded_string); + + // Result of iconv_mime_decode() on decoded header should be equal to initial header + $decoded_string = iconv_mime_decode($encoded_string, 0, $encoding); + $this->assertEquals(0, strcmp($header, $decoded_string)); + } +}