mirror of
https://github.com/WordPress/WordPress.git
synced 2026-06-19 07:37:07 +00:00
HTML API: Fixes for issues discovered while fuzzing.
Fuzz-testing was performed against the HTML API for finding edge cases that might be broken in the existing parsing code. A few issues were discovered with HTML normalization and warnings from out-of-bounds string reads. This patch contains new tests catching regressions on these behaviors and adds fixes for the discovered issues. Patch proposed by Codex and revised by dmsnell. Developed in: https://github.com/WordPress/wordpress-develop/pull/11982 Discussed in: https://core.trac.wordpress.org/ticket/65372 Fixes #65372. Built from https://develop.svn.wordpress.org/trunk@62439 git-svn-id: http://core.svn.wordpress.org/trunk@61720 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
@@ -440,6 +440,10 @@ class WP_Token_Map {
|
||||
* @return bool Whether there's an entry for the given word in the map.
|
||||
*/
|
||||
public function contains( string $word, string $case_sensitivity = 'case-sensitive' ): bool {
|
||||
if ( str_contains( $word, "\x00" ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$ignore_case = 'ascii-case-insensitive' === $case_sensitivity;
|
||||
|
||||
if ( $this->key_length >= strlen( $word ) ) {
|
||||
@@ -533,9 +537,17 @@ class WP_Token_Map {
|
||||
|
||||
// Search for a long word first, if the text is long enough, and if that fails, a short one.
|
||||
if ( $text_length > $this->key_length ) {
|
||||
$group_key = substr( $text, $offset, $this->key_length );
|
||||
/*
|
||||
* Keys cannot contain null bytes, which is taken care of for the full words,
|
||||
* but here it’s required to reject group keys with null bytes so that the
|
||||
* lookup doesn’t get off track when scanning the group string.
|
||||
*/
|
||||
if ( strcspn( $text, "\x00", $offset, $this->key_length ) < $this->key_length ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
|
||||
$group_key = substr( $text, $offset, $this->key_length );
|
||||
$group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
|
||||
if ( false === $group_at ) {
|
||||
// Perhaps a short word then.
|
||||
return strlen( $this->small_words ) > 0
|
||||
|
||||
@@ -738,7 +738,11 @@ class WP_HTML_Open_Elements {
|
||||
* When adding support for new elements, expand this switch to trap
|
||||
* cases where the precalculated value needs to change.
|
||||
*/
|
||||
switch ( $item->node_name ) {
|
||||
$namespaced_name = 'html' === $item->namespace
|
||||
? $item->node_name
|
||||
: "{$item->namespace} {$item->node_name}";
|
||||
|
||||
switch ( $namespaced_name ) {
|
||||
case 'APPLET':
|
||||
case 'BUTTON':
|
||||
case 'CAPTION':
|
||||
|
||||
@@ -813,8 +813,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
* until there are events or until there are no more
|
||||
* tokens works in the meantime and isn't obviously wrong.
|
||||
*/
|
||||
if ( empty( $this->element_queue ) && $this->step() ) {
|
||||
return $this->next_visitable_token();
|
||||
if ( empty( $this->element_queue ) ) {
|
||||
if ( $this->step() ) {
|
||||
return $this->next_visitable_token();
|
||||
}
|
||||
|
||||
if ( isset( $this->last_error ) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Process the next event on the queue.
|
||||
@@ -1401,6 +1407,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
$tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() );
|
||||
$in_html = 'html' === $this->get_namespace();
|
||||
$qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
|
||||
$qualified_name = str_replace( "\x00", "\u{FFFD}", $qualified_name );
|
||||
|
||||
if ( $this->is_tag_closer() ) {
|
||||
$html .= "</{$qualified_name}>";
|
||||
@@ -1414,15 +1421,36 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
}
|
||||
|
||||
$html .= "<{$qualified_name}";
|
||||
|
||||
$previous_attribute_was_true = false;
|
||||
$seen_attribute_names = array();
|
||||
foreach ( $attribute_names as $attribute_name ) {
|
||||
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
|
||||
$qualified_attribute_name = $this->get_qualified_attribute_name( $attribute_name );
|
||||
$qualified_attribute_name = str_replace( "\x00", "\u{FFFD}", $qualified_attribute_name );
|
||||
$qualified_attribute_name = wp_scrub_utf8( $qualified_attribute_name );
|
||||
if ( isset( $seen_attribute_names[ $qualified_attribute_name ] ) ) {
|
||||
continue;
|
||||
} else {
|
||||
$seen_attribute_names[ $qualified_attribute_name ] = true;
|
||||
}
|
||||
|
||||
if (
|
||||
$previous_attribute_was_true &&
|
||||
isset( $qualified_attribute_name[0] ) &&
|
||||
'=' === $qualified_attribute_name[0]
|
||||
) {
|
||||
$html .= '=""';
|
||||
}
|
||||
|
||||
$html .= " {$qualified_attribute_name}";
|
||||
$value = $this->get_attribute( $attribute_name );
|
||||
|
||||
if ( is_string( $value ) ) {
|
||||
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
|
||||
}
|
||||
|
||||
$html = str_replace( "\x00", "\u{FFFD}", $html );
|
||||
$previous_attribute_was_true = true === $value;
|
||||
$html = str_replace( "\x00", "\u{FFFD}", $html );
|
||||
}
|
||||
|
||||
if ( ! $in_html && $this->has_self_closing_flag() ) {
|
||||
@@ -2667,8 +2695,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
*/
|
||||
case '-FORM':
|
||||
if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
|
||||
$node = $this->state->form_element;
|
||||
$this->state->form_element = null;
|
||||
$node = $this->state->form_element;
|
||||
|
||||
/*
|
||||
* > If node is null or if the stack of open elements does not have node
|
||||
@@ -2681,10 +2708,20 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
null === $node ||
|
||||
! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' )
|
||||
) {
|
||||
// Parse error: ignore the token.
|
||||
/*
|
||||
* Parse error: ignore the token.
|
||||
*
|
||||
* Keep the form pointer intact when the end tag is ignored, such as
|
||||
* when a FORM closing tag appears inside an SVG TITLE integration
|
||||
* point. Otherwise the ignored token changes parser state in a way
|
||||
* that serialization cannot represent, allowing a later FORM opener
|
||||
* to appear in the first normalization pass and disappear on the second.
|
||||
*/
|
||||
return $this->step();
|
||||
}
|
||||
|
||||
$this->state->form_element = null;
|
||||
|
||||
$this->generate_implied_end_tags();
|
||||
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
|
||||
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
|
||||
|
||||
@@ -1424,7 +1424,7 @@ class WP_HTML_Tag_Processor {
|
||||
$this->tag_name_starts_at = $at;
|
||||
|
||||
// Fail if there is no possible tag closer.
|
||||
if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
|
||||
if ( false === $at || ( $at + 2 + $tag_length ) >= $doc_length ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1815,6 +1815,12 @@ class WP_HTML_Tag_Processor {
|
||||
|
||||
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
|
||||
$span_of_dashes = strspn( $html, '-', $closer_at );
|
||||
if ( $doc_length <= $span_of_dashes + $closer_at ) {
|
||||
$this->parser_state = self::STATE_INCOMPLETE_INPUT;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
|
||||
/*
|
||||
* @todo When implementing `set_modifiable_text()` ensure that updates to this token
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
*
|
||||
* @global string $wp_version
|
||||
*/
|
||||
$wp_version = '7.1-alpha-62438';
|
||||
$wp_version = '7.1-alpha-62439';
|
||||
|
||||
/**
|
||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||
|
||||
Reference in New Issue
Block a user