diff --git a/wp-includes/class-wp-token-map.php b/wp-includes/class-wp-token-map.php index 09a0b9303b..fc223b187f 100644 --- a/wp-includes/class-wp-token-map.php +++ b/wp-includes/class-wp-token-map.php @@ -440,6 +440,10 @@ class WP_Token_Map { * @return bool Whether there's an entry for the given word in the map. */ public function contains( string $word, string $case_sensitivity = 'case-sensitive' ): bool { + if ( str_contains( $word, "\x00" ) ) { + return false; + } + $ignore_case = 'ascii-case-insensitive' === $case_sensitivity; if ( $this->key_length >= strlen( $word ) ) { @@ -533,9 +537,17 @@ class WP_Token_Map { // Search for a long word first, if the text is long enough, and if that fails, a short one. if ( $text_length > $this->key_length ) { - $group_key = substr( $text, $offset, $this->key_length ); + /* + * Keys cannot contain null bytes, which is taken care of for the full words, + * but here it’s required to reject group keys with null bytes so that the + * lookup doesn’t get off track when scanning the group string. + */ + if ( strcspn( $text, "\x00", $offset, $this->key_length ) < $this->key_length ) { + return null; + } - $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key ); + $group_key = substr( $text, $offset, $this->key_length ); + $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key ); if ( false === $group_at ) { // Perhaps a short word then. return strlen( $this->small_words ) > 0 diff --git a/wp-includes/html-api/class-wp-html-open-elements.php b/wp-includes/html-api/class-wp-html-open-elements.php index e17f901c4d..0cd1f0fc45 100644 --- a/wp-includes/html-api/class-wp-html-open-elements.php +++ b/wp-includes/html-api/class-wp-html-open-elements.php @@ -738,7 +738,11 @@ class WP_HTML_Open_Elements { * When adding support for new elements, expand this switch to trap * cases where the precalculated value needs to change. */ - switch ( $item->node_name ) { + $namespaced_name = 'html' === $item->namespace + ? $item->node_name + : "{$item->namespace} {$item->node_name}"; + + switch ( $namespaced_name ) { case 'APPLET': case 'BUTTON': case 'CAPTION': diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index d9d0d365c6..35d91fad31 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -813,8 +813,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * until there are events or until there are no more * tokens works in the meantime and isn't obviously wrong. */ - if ( empty( $this->element_queue ) && $this->step() ) { - return $this->next_visitable_token(); + if ( empty( $this->element_queue ) ) { + if ( $this->step() ) { + return $this->next_visitable_token(); + } + + if ( isset( $this->last_error ) ) { + return false; + } } // Process the next event on the queue. @@ -1401,6 +1407,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() ); $in_html = 'html' === $this->get_namespace(); $qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name(); + $qualified_name = str_replace( "\x00", "\u{FFFD}", $qualified_name ); if ( $this->is_tag_closer() ) { $html .= ""; @@ -1414,15 +1421,36 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { } $html .= "<{$qualified_name}"; + + $previous_attribute_was_true = false; + $seen_attribute_names = array(); foreach ( $attribute_names as $attribute_name ) { - $html .= " {$this->get_qualified_attribute_name( $attribute_name )}"; + $qualified_attribute_name = $this->get_qualified_attribute_name( $attribute_name ); + $qualified_attribute_name = str_replace( "\x00", "\u{FFFD}", $qualified_attribute_name ); + $qualified_attribute_name = wp_scrub_utf8( $qualified_attribute_name ); + if ( isset( $seen_attribute_names[ $qualified_attribute_name ] ) ) { + continue; + } else { + $seen_attribute_names[ $qualified_attribute_name ] = true; + } + + if ( + $previous_attribute_was_true && + isset( $qualified_attribute_name[0] ) && + '=' === $qualified_attribute_name[0] + ) { + $html .= '=""'; + } + + $html .= " {$qualified_attribute_name}"; $value = $this->get_attribute( $attribute_name ); if ( is_string( $value ) ) { $html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"'; } - $html = str_replace( "\x00", "\u{FFFD}", $html ); + $previous_attribute_was_true = true === $value; + $html = str_replace( "\x00", "\u{FFFD}", $html ); } if ( ! $in_html && $this->has_self_closing_flag() ) { @@ -2667,8 +2695,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ case '-FORM': if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { - $node = $this->state->form_element; - $this->state->form_element = null; + $node = $this->state->form_element; /* * > If node is null or if the stack of open elements does not have node @@ -2681,10 +2708,20 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { null === $node || ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) ) { - // Parse error: ignore the token. + /* + * Parse error: ignore the token. + * + * Keep the form pointer intact when the end tag is ignored, such as + * when a FORM closing tag appears inside an SVG TITLE integration + * point. Otherwise the ignored token changes parser state in a way + * that serialization cannot represent, allowing a later FORM opener + * to appear in the first normalization pass and disappear on the second. + */ return $this->step(); } + $this->state->form_element = null; + $this->generate_implied_end_tags(); if ( $node !== $this->state->stack_of_open_elements->current_node() ) { // @todo Indicate a parse error once it's possible. This error does not impact the logic here. diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php index 4015b352c1..77c1a471db 100644 --- a/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1424,7 +1424,7 @@ class WP_HTML_Tag_Processor { $this->tag_name_starts_at = $at; // Fail if there is no possible tag closer. - if ( false === $at || ( $at + $tag_length ) >= $doc_length ) { + if ( false === $at || ( $at + 2 + $tag_length ) >= $doc_length ) { return false; } @@ -1815,6 +1815,12 @@ class WP_HTML_Tag_Processor { // Abruptly-closed empty comments are a sequence of dashes followed by `>`. $span_of_dashes = strspn( $html, '-', $closer_at ); + if ( $doc_length <= $span_of_dashes + $closer_at ) { + $this->parser_state = self::STATE_INCOMPLETE_INPUT; + + return false; + } + if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { /* * @todo When implementing `set_modifiable_text()` ensure that updates to this token diff --git a/wp-includes/version.php b/wp-includes/version.php index de84974c14..1261e7cc66 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '7.1-alpha-62438'; +$wp_version = '7.1-alpha-62439'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.