A rewrite of the HTML parsing code

NOTE: Minimum requirement for this code is now PHP 4.0.5


git-svn-id: file:///svn/phpbb/branches/phpBB-2_0_0@5481 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
Graham Eames 2006-01-21 18:48:44 +00:00
parent 64b4b93524
commit f4891dfeeb

View file

@ -46,66 +46,7 @@ function prepare_message($message, $html_on, $bbcode_on, $smile_on, $bbcode_uid
if ($html_on) if ($html_on)
{ {
$allowed_html_tags = split(',', $board_config['allow_html_tags']); $message = addslashes(preg_replace_callback('/<\/?(\w+)((?:\s+\w+=(?:"[^"]*"|\'[^\']*\'|`[^`]*`|[^\'"`]*?))*)\s*?\/?>/', 'clean_html', stripslashes($message)));
$end_html = 0;
$start_html = 1;
$tmp_message = '';
$message = ' ' . $message . ' ';
while ($start_html = strpos($message, '<', $start_html))
{
$tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($message, $end_html + 1, ($start_html - $end_html - 1)));
$matches = array();
preg_match('#^(/?\w+(?:\s+\w+=(?:\w+|"[^"]*"|\'[^\']*\'|`[^`]*`))*\s*?/?>)#', stripslashes(substr($message, $start_html + 1, strlen($message) - $start_html)), $matches);
$element = addslashes($matches[1]);
$end_html = $start_html + strlen($element);
if ($end_html != $start_html)
{
$length = $end_html - $start_html + 1;
$hold_string = substr($message, $start_html, $length);
$matches = array();
preg_match('#(</?\w+(?:\s+\w+=(?:\w+|"[^"]*"|\'[^\']*\'|`[^`]*`))*\s*?/?>)$#', stripslashes($hold_string), $matches);
$short_hold_string = addslashes($matches[1]);
if (strlen($short_hold_string) < $length)
{
$tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($hold_string, 0, $length - strlen($short_hold_string)));
$hold_string = $short_hold_string;
}
$tagallowed = false;
for ($i = 0; $i < sizeof($allowed_html_tags); $i++)
{
$match_tag = trim($allowed_html_tags[$i]);
if (preg_match('#^<\/?' . $match_tag . '[> ]#is', $hold_string))
{
$tagallowed = (preg_match('#^<\/?' . $match_tag . ' .*?(style[\t ]*?=|on[\w]+[\t ]*?=)#is', $hold_string)) ? false : true;
}
}
$tmp_message .= ($length && !$tagallowed) ? preg_replace($html_entities_match, $html_entities_replace, $hold_string) : $hold_string;
$start_html += $length;
}
else
{
$tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($message, $start_html, strlen($message)));
$start_html = strlen($message);
$end_html = $start_html;
}
}
if (!$end_html || ($end_html != strlen($message) && $tmp_message != ''))
{
$tmp_message .= preg_replace($html_entities_match, $html_entities_replace, substr($message, $end_html + 1));
}
$message = ($tmp_message != '') ? trim($tmp_message) : trim($message);
} }
else else
{ {
@ -859,4 +800,47 @@ function generate_smilies($mode, $page_id)
} }
} }
/**
* Called from within prepare_message to clean included HTML tags if HTML is
* turned on for that post
* @param array $tag Matching text from the message to parse
*/
function clean_html($tag)
{
global $board_config;
$allowed_html_tags = preg_split('/, */', strtolower($board_config['allow_html_tags']));
$disallowed_attributes = '/^(?:style|on)/';
if (in_array(strtolower($tag[1]), $allowed_html_tags))
{
$attributes = '';
if (!empty($tag[2]))
{
// Get all the elements of a tag so that they can be checked in turn
$matches = array();
preg_match_all('/\s+(\w+)=("[^"]*"|\'[^\']*\'|`[^`]*`|[^\'"`]*)/', $tag[2], $matches);
foreach ($matches[1] as $key => $value)
{
// Remove any attributes which are not allowed
if (preg_match($disallowed_attributes, strtolower($value)) || preg_match('/(?!["\'`])[^0-9a-zA-Z\\x2D\\x2E\\x3A\\x5F]+(?!["\'`])/', $matches[2][$key]))
{
continue;
}
// Build a string containing the allowed attributes, strip out anything that could harm the parser
$attributes .= ' ' . $value . '="' . htmlentities(preg_replace('/^[`"\']?(.*?)[`"\']?$/', '\1', $matches[2][$key])) . '"';
}
}
else
{
// This is a closing tag or one without any attributes, it is safe
return strtolower($tag[0]);
}
// Build the HTML tag from the tag name and the allowed attributes
return '<' . strtolower($tag[1]) . $attributes . '>';
}
// This tag is not allowed so escape it
return htmlentities($tag[0]);
}
?> ?>