HTML API: Fixes for issues discovered while fuzzing.

Fuzz-testing was performed against the HTML API for finding edge cases
that might be broken in the existing parsing code. A few issues were
discovered with HTML normalization and warnings from out-of-bounds
string reads.

This patch contains new tests catching regressions on these behaviors
and adds fixes for the discovered issues.

Patch proposed by Codex and revised by dmsnell.

Developed in: https://github.com/WordPress/wordpress-develop/pull/11982
Discussed in: https://core.trac.wordpress.org/ticket/65372

Fixes #65372.

Built from https://develop.svn.wordpress.org/trunk@62439


git-svn-id: http://core.svn.wordpress.org/trunk@61720 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
dmsnell
2026-06-01 10:40:30 +00:00
parent dff8c68dda
commit d0062e4945
5 changed files with 71 additions and 12 deletions
+14 -2
View File
@@ -440,6 +440,10 @@ class WP_Token_Map {
* @return bool Whether there's an entry for the given word in the map.
*/
public function contains( string $word, string $case_sensitivity = 'case-sensitive' ): bool {
if ( str_contains( $word, "\x00" ) ) {
return false;
}
$ignore_case = 'ascii-case-insensitive' === $case_sensitivity;
if ( $this->key_length >= strlen( $word ) ) {
@@ -533,9 +537,17 @@ class WP_Token_Map {
// Search for a long word first, if the text is long enough, and if that fails, a short one.
if ( $text_length > $this->key_length ) {
$group_key = substr( $text, $offset, $this->key_length );
/*
* Keys cannot contain null bytes, which is taken care of for the full words,
* but here its required to reject group keys with null bytes so that the
* lookup doesnt get off track when scanning the group string.
*/
if ( strcspn( $text, "\x00", $offset, $this->key_length ) < $this->key_length ) {
return null;
}
$group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
$group_key = substr( $text, $offset, $this->key_length );
$group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
if ( false === $group_at ) {
// Perhaps a short word then.
return strlen( $this->small_words ) > 0
@@ -738,7 +738,11 @@ class WP_HTML_Open_Elements {
* When adding support for new elements, expand this switch to trap
* cases where the precalculated value needs to change.
*/
switch ( $item->node_name ) {
$namespaced_name = 'html' === $item->namespace
? $item->node_name
: "{$item->namespace} {$item->node_name}";
switch ( $namespaced_name ) {
case 'APPLET':
case 'BUTTON':
case 'CAPTION':
@@ -813,8 +813,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
* until there are events or until there are no more
* tokens works in the meantime and isn't obviously wrong.
*/
if ( empty( $this->element_queue ) && $this->step() ) {
return $this->next_visitable_token();
if ( empty( $this->element_queue ) ) {
if ( $this->step() ) {
return $this->next_visitable_token();
}
if ( isset( $this->last_error ) ) {
return false;
}
}
// Process the next event on the queue.
@@ -1401,6 +1407,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() );
$in_html = 'html' === $this->get_namespace();
$qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
$qualified_name = str_replace( "\x00", "\u{FFFD}", $qualified_name );
if ( $this->is_tag_closer() ) {
$html .= "</{$qualified_name}>";
@@ -1414,15 +1421,36 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
}
$html .= "<{$qualified_name}";
$previous_attribute_was_true = false;
$seen_attribute_names = array();
foreach ( $attribute_names as $attribute_name ) {
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
$qualified_attribute_name = $this->get_qualified_attribute_name( $attribute_name );
$qualified_attribute_name = str_replace( "\x00", "\u{FFFD}", $qualified_attribute_name );
$qualified_attribute_name = wp_scrub_utf8( $qualified_attribute_name );
if ( isset( $seen_attribute_names[ $qualified_attribute_name ] ) ) {
continue;
} else {
$seen_attribute_names[ $qualified_attribute_name ] = true;
}
if (
$previous_attribute_was_true &&
isset( $qualified_attribute_name[0] ) &&
'=' === $qualified_attribute_name[0]
) {
$html .= '=""';
}
$html .= " {$qualified_attribute_name}";
$value = $this->get_attribute( $attribute_name );
if ( is_string( $value ) ) {
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
}
$html = str_replace( "\x00", "\u{FFFD}", $html );
$previous_attribute_was_true = true === $value;
$html = str_replace( "\x00", "\u{FFFD}", $html );
}
if ( ! $in_html && $this->has_self_closing_flag() ) {
@@ -2667,8 +2695,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
case '-FORM':
if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
$node = $this->state->form_element;
$this->state->form_element = null;
$node = $this->state->form_element;
/*
* > If node is null or if the stack of open elements does not have node
@@ -2681,10 +2708,20 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
null === $node ||
! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' )
) {
// Parse error: ignore the token.
/*
* Parse error: ignore the token.
*
* Keep the form pointer intact when the end tag is ignored, such as
* when a FORM closing tag appears inside an SVG TITLE integration
* point. Otherwise the ignored token changes parser state in a way
* that serialization cannot represent, allowing a later FORM opener
* to appear in the first normalization pass and disappear on the second.
*/
return $this->step();
}
$this->state->form_element = null;
$this->generate_implied_end_tags();
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
@@ -1424,7 +1424,7 @@ class WP_HTML_Tag_Processor {
$this->tag_name_starts_at = $at;
// Fail if there is no possible tag closer.
if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
if ( false === $at || ( $at + 2 + $tag_length ) >= $doc_length ) {
return false;
}
@@ -1815,6 +1815,12 @@ class WP_HTML_Tag_Processor {
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( $doc_length <= $span_of_dashes + $closer_at ) {
$this->parser_state = self::STATE_INCOMPLETE_INPUT;
return false;
}
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
/*
* @todo When implementing `set_modifiable_text()` ensure that updates to this token
+1 -1
View File
@@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '7.1-alpha-62438';
$wp_version = '7.1-alpha-62439';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.