From e5f594466d38bfe3e1ae14cb047932bd2d8a1a4a Mon Sep 17 00:00:00 2001 From: Graham Eames Date: Fri, 24 Feb 2006 21:14:54 +0000 Subject: [PATCH] Some alterations to the new HTML code to eliminate the need for us to increase the minimum version requirements. Min. requirement remains at 4.0.3 git-svn-id: file:///svn/phpbb/branches/phpBB-2_0_0@5582 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/docs/CHANGELOG.html | 10 +++- phpBB/includes/functions_post.php | 87 ++++++++++++++++++++++--------- 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/phpBB/docs/CHANGELOG.html b/phpBB/docs/CHANGELOG.html index c365b70b9f..d5d8b7cb25 100644 --- a/phpBB/docs/CHANGELOG.html +++ b/phpBB/docs/CHANGELOG.html @@ -74,6 +74,9 @@ p,ul,td {font-size:10pt;}
  • [Fix] retrieving category rows in index.php (Bug #90)
  • [Fix] improved index performance by determining the permissions before iterating through all forums (Bug #91)
  • [Fix] Better handling of short usernames within the search (bug #105)
  • +
  • [Fix] Send a no-cache header on admin pages as well as normal board pages (Bug #149)
  • +
  • [Fix] Apply word censors to the message when quoting it (Bug #405)
  • +
  • [Fix] Improved performance of query in admin_groups (Bug #753)
  • [Fix] Workaround for an issue in either PHP or MSSQL resulting in a space being returned instead of an empty string (bug #830)
  • [Fix] Correct use of default_style config value (Bug #861)
  • [Fix] Replace unneeded unset calls in admin_db_utilities.php - vanderaj
  • @@ -83,10 +86,13 @@ p,ul,td {font-size:10pt;}
  • [Fix] Assorted fixes and cleanup of admin_words.php, now requires confirmation of deletions
  • [Fix] Addition and editing of smilies can no longer be performed via GET, now requires confirmation of deletions
  • [Fix] Escape group names in admin_groups.php
  • +
  • [Sec] Replace strip_tags with htmlspecialchars in private message subject
  • [Sec] Some changes to HTML handling if enabled
  • -
  • [Sec] Escape any special characters in reverse dns - Anthrax101 +
  • [Sec] Escape any special characters in reverse dns - Anthrax101
  • +
  • [Sec] Typecast poll id values - Anthrax101
  • +
  • [Sec] Added configurable search flood control to reduce the effect of DoS style attacks
  • [Change] Changed handling of the case where a selected style doesn't exist in the database
  • -
  • [Change] PHP requirements are now 4.0.5 as a result of changes to HTML parsing
  • +
  • [Change] Changed handling of topic pruning to improve performance
  • diff --git a/phpBB/includes/functions_post.php b/phpBB/includes/functions_post.php index ae276901c5..d74e70d8cb 100644 --- a/phpBB/includes/functions_post.php +++ b/phpBB/includes/functions_post.php @@ -46,7 +46,25 @@ function prepare_message($message, $html_on, $bbcode_on, $smile_on, $bbcode_uid if ($html_on) { - $message = addslashes(preg_replace_callback('/<\/?(\w+)((?:[^\w>]+\w+(?:\s*=\s*(?:"[^"]*"|\'[^\']*\'|`[^`]*`|´[^´]*´|[^`´\'">]*))?)*)[\W]*?\/?>/', 'clean_html', stripslashes($message))); + // If HTML is on, we try to make it safe + // This approach is quite agressive and anything that does not look like a valid tag + // is going to get converted to HTML entities + $message = stripslashes($message); + $html_match = '#<[^\w<]*(\w+)((?:"[^"]*"|\'[^\']*\'|[^<>\'"])+)?>#'; + $matches = array(); + + $message_split = preg_split($html_match, $message); + preg_match_all($html_match, $message, $matches); + + $message = ''; + + foreach ($message_split as $part) + { + $tag = array(array_shift($matches[0]), array_shift($matches[1]), array_shift($matches[2])); + $message .= htmlspecialchars($part) . clean_html($tag); + } + + $message = addslashes($message); } else { @@ -809,38 +827,57 @@ function clean_html($tag) { global $board_config; + if (empty($tag[0])) + { + return ''; + } + $allowed_html_tags = preg_split('/, */', strtolower($board_config['allow_html_tags'])); $disallowed_attributes = '/^(?:style|on)/'; - if (in_array(strtolower($tag[1]), $allowed_html_tags)) - { - $attributes = ''; - if (!empty($tag[2])) + // Check if this is an end tag + preg_match('/<[^\w\/]*\/[\W]*(\w+)/', $tag[0], $matches); + if (sizeof($matches)) + { + if (in_array(strtolower($matches[1]), $allowed_html_tags)) { - // Get all the elements of a tag so that they can be checked in turn - $matches = array(); - preg_match_all('/[\W]+(\w+)(?:\s*=\s*("[^"]*"|\'[^\']*\'|`[^`]*`|´[^´]*´|[^\'"`´]*))?/', $tag[2], $matches); - - foreach ($matches[1] as $key => $value) - { - // Remove any attributes which are not allowed - if (preg_match($disallowed_attributes, strtolower($value)) || (!preg_match('/([\'´`"]).*\\1/', $matches[2][$key]) && preg_match('/[^0-9a-zA-Z\\x2D\\x2E\\\x3A\\x5F]+/', $matches[2][$key]))) - { - continue; - } - // Build a string containing the allowed attributes, strip out anything that could harm the parser - $attributes .= ' ' . $value . '="' . htmlentities(preg_replace('/^[´`"\']?(.*?)[´`"\']?$/', '\1', $matches[2][$key])) . '"'; - } + return ''; } else { - // This is a closing tag or one without any attributes, it is safe - return strtolower($tag[0]); + return htmlspecialchars(''); } - // Build the HTML tag from the tag name and the allowed attributes - return '<' . strtolower($tag[1]) . $attributes . '>'; } - // This tag is not allowed so escape it - return htmlentities($tag[0]); + + // Check if this is an allowed tag + if (in_array(strtolower($tag[1]), $allowed_html_tags)) + { + $attributes = ''; + if (!empty($tag[2])) + { + preg_match_all('/[\W]*?(\w+)[\W]*?=[\W]*?(["\'])((?:(?!\2).)*)\2/', $tag[2], $test); + for ($i = 0; $i < sizeof($test[0]); $i++) + { + if (preg_match($disallowed_attributes, $test[1][$i])) + { + continue; + } + $attributes .= ' ' . $test[1][$i] . '=' . $test[2][$i] . str_replace(array('[', ']'), array('[', ']'), htmlspecialchars($test[3][$i])) . $test[2][$i]; + } + } + if (in_array(strtolower($tag[1]), $allowed_html_tags)) + { + return '<' . $tag[1] . $attributes . '>'; + } + else + { + return htmlspecialchars('<' . $tag[1] . $attributes . '>'); + } + } + // Finally, this is not an allowed tag so strip all the attibutes and escape it + else + { + return htmlspecialchars('<' . $tag[1] . '>'); + } } ?> \ No newline at end of file