From f4891dfeeb4f3c21a09544a82cca40bec354f305 Mon Sep 17 00:00:00 2001 From: Graham Eames Date: Sat, 21 Jan 2006 18:48:44 +0000 Subject: [PATCH] A rewrite of the HTML parsing code NOTE: Minimum requirement for this code is now PHP 4.0.5 git-svn-id: file:///svn/phpbb/branches/phpBB-2_0_0@5481 89ea8834-ac86-4346-8a33-228a782c2dd0 --- phpBB/includes/functions_post.php | 104 +++++++++++++----------------- 1 file changed, 44 insertions(+), 60 deletions(-) diff --git a/phpBB/includes/functions_post.php b/phpBB/includes/functions_post.php index f95b835ab2..25add77cb6 100644 --- a/phpBB/includes/functions_post.php +++ b/phpBB/includes/functions_post.php @@ -46,66 +46,7 @@ function prepare_message($message, $html_on, $bbcode_on, $smile_on, $bbcode_uid if ($html_on) { - $allowed_html_tags = split(',', $board_config['allow_html_tags']); - - $end_html = 0; - $start_html = 1; - $tmp_message = ''; - $message = ' ' . $message . ' '; - - while ($start_html = strpos($message, '<', $start_html)) - { - $tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($message, $end_html + 1, ($start_html - $end_html - 1))); - - $matches = array(); - preg_match('#^(/?\w+(?:\s+\w+=(?:\w+|"[^"]*"|\'[^\']*\'|`[^`]*`))*\s*?/?>)#', stripslashes(substr($message, $start_html + 1, strlen($message) - $start_html)), $matches); - $element = addslashes($matches[1]); - $end_html = $start_html + strlen($element); - - if ($end_html != $start_html) - { - $length = $end_html - $start_html + 1; - $hold_string = substr($message, $start_html, $length); - - $matches = array(); - preg_match('#()$#', stripslashes($hold_string), $matches); - $short_hold_string = addslashes($matches[1]); - - if (strlen($short_hold_string) < $length) - { - $tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($hold_string, 0, $length - strlen($short_hold_string))); - $hold_string = $short_hold_string; - } - - $tagallowed = false; - for ($i = 0; $i < sizeof($allowed_html_tags); $i++) - { - $match_tag = trim($allowed_html_tags[$i]); - if (preg_match('#^<\/?' . $match_tag . '[> ]#is', $hold_string)) - { - $tagallowed = (preg_match('#^<\/?' . $match_tag . ' .*?(style[\t ]*?=|on[\w]+[\t ]*?=)#is', $hold_string)) ? false : true; - } - } - - $tmp_message .= ($length && !$tagallowed) ? preg_replace($html_entities_match, $html_entities_replace, $hold_string) : $hold_string; - - $start_html += $length; - } - else - { - $tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($message, $start_html, strlen($message))); - - $start_html = strlen($message); - $end_html = $start_html; - } - } - - if (!$end_html || ($end_html != strlen($message) && $tmp_message != '')) - { - $tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($message, $end_html + 1)); - } - - $message = ($tmp_message != '') ? trim($tmp_message) : trim($message); + $message = addslashes(preg_replace_callback('/<\/?(\w+)((?:\s+\w+=(?:"[^"]*"|\'[^\']*\'|`[^`]*`|[^\'"`]*?))*)\s*?\/?>/', 'clean_html', stripslashes($message))); } else { @@ -859,4 +800,47 @@ function generate_smilies($mode, $page_id) } } +/** +* Called from within prepare_message to clean included HTML tags if HTML is +* turned on for that post +* @param array $tag Matching text from the message to parse +*/ +function clean_html($tag) +{ + global $board_config; + + $allowed_html_tags = preg_split('/, */', strtolower($board_config['allow_html_tags'])); + $disallowed_attributes = '/^(?:style|on)/'; + + if (in_array(strtolower($tag[1]), $allowed_html_tags)) + { + $attributes = ''; + if (!empty($tag[2])) + { + // Get all the elements of a tag so that they can be checked in turn + $matches = array(); + preg_match_all('/\s+(\w+)=("[^"]*"|\'[^\']*\'|`[^`]*`|[^\'"`]*)/', $tag[2], $matches); + + foreach ($matches[1] as $key => $value) + { + // Remove any attributes which are not allowed + if (preg_match($disallowed_attributes, strtolower($value)) || preg_match('/(?!["\'`])[^0-9a-zA-Z\\x2D\\x2E\\x3A\\x5F]+(?!["\'`])/', $matches[2][$key])) + { + continue; + } + // Build a string containing the allowed attributes, strip out anything that could harm the parser + $attributes .= ' ' . $value . '="' . htmlentities(preg_replace('/^[`"\']?(.*?)[`"\']?$/', '\1', $matches[2][$key])) . '"'; + } + } + else + { + // This is a closing tag or one without any attributes, it is safe + return strtolower($tag[0]); + } + // Build the HTML tag from the tag name and the allowed attributes + return '<' . strtolower($tag[1]) . $attributes . '>'; + } + // This tag is not allowed so escape it + return htmlentities($tag[0]); +} ?> \ No newline at end of file