General: Add support for unicode email addresses in is_email and sanitize_email

This adds support for the unicode address extensions in RFC 6530-3 and refactors the code so there are fewer long regexes and less duplication between sanitize_email and is_email. A new class, WP_Email_Address, provides the shared parts.

Opting out of unicode support is easy, default-filters.php adds unicode support by adding filters, which can be removed.

`sanitize_email` no longer does major changes like removing an entire subdomain from someone's address, it only cleans up things like soft hyphens and whitespace — changes that happen when coping an email address from text.

Developed in: https://github.com/WordPress/wordpress-develop/pull/5237
Discussed in: https://core.trac.wordpress.org/ticket/31992

Props agulbra, akirk, benniledl, dmsnell, ironprogrammer, justlevine, mdawaffe, mukeshpanchal27, SirLouen, tusharbharti.
Fixes #31992.

Built from https://develop.svn.wordpress.org/trunk@62482


git-svn-id: http://core.svn.wordpress.org/trunk@61763 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
dmsnell
2026-06-10 15:05:45 +00:00
parent 9ffa8f1026
commit 6909e6384a
5 changed files with 557 additions and 164 deletions
+405
View File
@@ -0,0 +1,405 @@
<?php
/**
* Class 'WP_Email_Address'.
*
* @package WordPress
* @since 7.1.0
*/
/**
* WP_Email_Address Class.
*
* Represents a validated email address. The address may or may not be deliverable.
*
* Use the static factory method {@see WP_Email_Address::from_string()} to create instances
* of this class rather than the constructor. This method only returns an instance for
* validated email addresses, and `null` if the provided email address fails to validate.
*
* Example:
*
* $email = WP_Email_Address::from_string( 'wordpress@wordpress.org' );
* 'wordpress' === $email->get_local_part();
* 'wordpress.org' === $email->get_domain();
*
* @see self::from_string() to parse and validate a provided email address.
* @see self::get_localpart() for the local part or mailbox of the address.
* @see self::get_ascii_domain() for an encoded version of the domain best suited for
* printing in contexts where other software reads it and
* decodes it, such as in an `<a href>` attribute.
* @see self::get_unicode_domain() for a decoded version of the domain best suited for
* printing in contexts where humans read it, where any
* Unicode characters print as they are, not as punycode.
*
* @since 7.1.0
*/
final class WP_Email_Address {
/**
* Regex for the local part when Unicode is not enabled.
*
* Matches the character set from the WHATWG email specification:
* https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)
*
* @since 7.1.0
* @var string
*/
const LOCAL_PART_ASCII_REGEX = '/^[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+$/';
/**
* Regex for the local part when Unicode is enabled.
*
* Extends the WHATWG character set to allow Unicode letters and numbers,
* and applies the same grapheme-cluster structure used for domain labels:
* each cluster must open with a non-combining character.
*
* @since 7.1.0
* @var string
*/
const LOCAL_PART_UNICODE_REGEX = '/^([\p{L}\p{N}.!#$%&\'*+\/=?^_`{|}~-]\p{M}*)+$/u';
/**
* Pattern for a single ASCII domain label (no dot).
*
* Matches a label from the WHATWG email specification: starts and ends with
* a letter or digit; internal characters may include hyphens.
*
* @since 7.1.0
* @var string
*/
const DOMAIN_LABEL_ASCII = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
/**
* Pattern for a single Unicode domain label (no dot).
*
* Extends the ASCII label pattern to allow Unicode letters and numbers,
* with grapheme-cluster structure: each cluster must open with a letter or
* digit (not a combining mark), followed by zero or more combining marks.
*
* @since 7.1.0
* @var string
*/
const DOMAIN_LABEL_UNICODE = '[\p{L}\p{N}]\p{M}*(?:(?:[\p{L}\p{N}-]\p{M}*)*[\p{L}\p{N}]\p{M}*)?';
/**
* Regex for the domain when Unicode is not enabled.
*
* Assembled from {@see self::DOMAIN_LABEL_ASCII}: one label, then zero or
* more dot-separated labels.
*
* @since 7.1.0
* @var string
*/
const DOMAIN_ASCII_REGEX = '/^' . self::DOMAIN_LABEL_ASCII . '(?:\.' . self::DOMAIN_LABEL_ASCII . ')*$/';
/**
* Regex for the domain when Unicode is enabled.
*
* Assembled from {@see self::DOMAIN_LABEL_UNICODE}: one label, then zero or
* more dot-prefixed labels.
*
* @since 7.1.0
* @var string
*/
const DOMAIN_UNICODE_REGEX = '/^' . self::DOMAIN_LABEL_UNICODE . '(?:\.' . self::DOMAIN_LABEL_UNICODE . ')*$/u';
/**
* The local part of the email address (the portion before the '@').
*
* @since 7.1.0
* @var string
*/
private $localpart;
/**
* The email domain using punycode transcription instead of Unicode characters.
*
* Example:
*
* $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
* 'xn--bcher-kva.tld' === $email->get_ascii_domain();
*
* @see self::$decoded_domain
*
* @since 7.1.0
* @var string
*/
private $encoded_domain;
/**
* The email domain, which may contain Unicode characters.
*
* Example:
*
* $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
* 'bücher.tld' === $email->get_unicode_domain();
*
* @see self::$encoded_domain
*
* @since 7.1.0
* @var string
*/
private $decoded_domain;
/**
* Private constructor. Use {@see WP_Email_Address::from_string()} to create instances.
*
* @since 7.1.0
* @private
*
* @param string $localpart The local part of the email address.
* @param string $ascii_domain The domain part of the email address, which may include punycode transcription.
* @param string|null $unicode_domain The domain part of the email address, which may contain Unicode characters, or
* null if no Unicode translation exists.
*/
private function __construct( string $localpart, string $ascii_domain, ?string $unicode_domain ) {
$this->localpart = $localpart;
$this->encoded_domain = $ascii_domain;
$this->decoded_domain = $unicode_domain;
}
/**
* Creates a WP_Email_Address from a string.
*
* This method is intended to accept all strings that are considered valid email
* addresses by the WHATWG HTML specification for the `email` input type
* {@link https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)}
* and some additional addresses, while rejecting strings that are more likely to
* be typos, mispastes, or attacks. This class may reject a few address that are
* valid according to RFC 5322, but it always accepts an address if it's valid
* according to WHATWG. Put differently: If users can type an address into the
* major browsers of 2026, this class accepts them, if they can't (in 2026),
* this class may or may not.
*
* Example:
*
* // Typical all-US-ASCII email address.
* $email = WP_Email_Address::from_string( 'webmaster@example.com' );
* 'webmaster' === $email->get_localpart();
* 'example.com' === $email->get_ascii_domain();
* 'example.com' === $email->get_unicode_domain();
*
* // Punycode domains are always decoded.
* $email = WP_Email_Address::from_string( 'books@xn--bcher-kva.de' );
* 'books' === $email->get_localpart();
* 'xn--bcher-kva.de' === $email->get_ascii_domain();
* 'Bücher.de' === $email->get_unicode_domain();
*
* // Unicode localparts are accepted if Unicode addresses are requested (the default).
* $email = WP_Email_Address::from_string( 'bücher@example.com' );
* 'bücher' === $email->get_localpart();
*
* // Addresses with non-ASCII are rejected if ASCII-only addresses are requested.
* null === WP_Email_Address::from_string( 'books@xn--bcher-kva.de', 'ascii' );
* null === WP_Email_Address::from_string( 'bücher@xn--bcher-kva.de', 'ascii' );
* null === WP_Email_Address::from_string( 'bücher@Bücher.de', 'ascii' );
*
* // Some valid addresses (according to RFC 5322) are rejected.
* null === WP_Email_Address::from_string( '"<iframe src=...>"@example.com' );
*
* Note! If an address contains punycode encodings but the required {@see idn_to_utf8()}
* function is missing (from the `intl` extension), this will reject that email address.
*
* @since 7.1.0
*
* @param string $input The email address string to parse.
* @param 'ascii'|'unicode' $character_set Allow only ASCII addresses or all valid Unicode addresses.
* @return WP_Email_Address|null A WP_Email_Address instance, or null if the input fails to validate.
*/
public static function from_string( string $input, string $character_set = 'unicode' ): ?WP_Email_Address {
// There must be exactly one '@' sign.
$at_pos = strpos( $input, '@' );
if ( false === $at_pos || strrpos( $input, '@' ) !== $at_pos ) {
return null;
}
$allow_unicode = 'unicode' === $character_set;
$localpart = substr( $input, 0, $at_pos );
$ascii_domain = substr( $input, $at_pos + 1 );
$domain_labels = explode( '.', $ascii_domain );
$local_pattern = $allow_unicode ? self::LOCAL_PART_UNICODE_REGEX : self::LOCAL_PART_ASCII_REGEX;
$domain_pattern = $allow_unicode ? self::DOMAIN_UNICODE_REGEX : self::DOMAIN_ASCII_REGEX;
foreach ( $domain_labels as $label ) {
// DNS limits each label to 63 octets.
if ( strlen( $label ) > 63 ) {
return null;
}
}
/*
* Without support for decoding punycode its not possible to validate
* the email address, so abort if any domain labels require decoding.
*
* The pattern detects `xn--` prefixes and invalid ACE prefixes.
*/
$needs_decoding = 1 === preg_match( '/(?:^|\.)..--/', $ascii_domain );
if ( $needs_decoding && ! function_exists( 'idn_to_utf8' ) ) {
return null;
}
/*
* Validate each domain label, decode any punycode to UTF-8, and
* reassemble the decoded labels into the local $domain variable.
*/
if ( $needs_decoding ) {
$decoded_labels = array();
foreach ( $domain_labels as $label ) {
// Decode punycode labels to their Unicode form for further validation.
if ( str_starts_with( $label, 'xn--' ) ) {
$label = idn_to_utf8( $label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46 );
if ( false === $label ) {
return null;
}
} elseif ( 1 === preg_match( '/^..--/', $label ) ) {
// Reject labels with a reserved ACE-like prefix (two chars followed by '--').
return null;
}
$decoded_labels[] = $label;
}
$decoded_domain = implode( '.', $decoded_labels );
} else {
$decoded_domain = $ascii_domain;
}
// Without Unicode support, reject any non-ASCII byte in either part.
if (
! $allow_unicode &&
(
1 === preg_match( '/[\x80-\xff]/', $input ) ||
1 === preg_match( '/[\x80-\xff]/', $decoded_domain )
)
) {
return null;
}
// All parts must be valid UTF-8, regardless of whether Unicode is requested. (A valid ASCII string is also valid UTF-8.)
if (
! wp_is_valid_utf8( $localpart ) ||
! wp_is_valid_utf8( $ascii_domain ) ||
! wp_is_valid_utf8( $decoded_domain )
) {
return null;
}
// Validate the local part against the allowed character set.
if ( 1 !== preg_match( $local_pattern, $localpart ) ) {
/** This filter is documented in wp-includes/formatting.php */
if ( ! apply_filters( 'is_email', false, $input, 'local_invalid_chars' ) ) {
return null;
}
}
// The domain must contain at least one dot.
if ( ! str_contains( $ascii_domain, '.' ) ) {
/** This filter is documented in wp-includes/formatting.php */
if ( ! apply_filters( 'is_email', false, $input, 'domain_no_periods' ) ) {
return null;
}
}
// Validate the domain against the allowed structure.
if ( 1 !== preg_match( $domain_pattern, $decoded_domain ) ) {
return null;
}
return new self( $localpart, $ascii_domain, $decoded_domain );
}
/**
* Returns the local part of the email address (the portion before the '@').
*
* Example:
*
* $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
* 'checkout' === $email->get_localpart();
*
* @since 7.1.0
*
* @return string The local part of the email address.
*/
public function get_localpart(): string {
return $this->localpart;
}
/**
* Returns the ASCII form of the domain, suitable for contexts in which
* other software will be reading and decoding it. May contain punycode.
*
* Example:
*
* $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
* 'xn--bcher-kva.tld' === $email->get_ascii_domain();
*
* Note! Do not mix a Unicode local part with an ASCII domain part.
* Prefer to keep the entire address in one form.
*
* @see self::get_unicode_domain()
*
* @return string Form of domain for machines, potentially containing
* punycode translation of Unicode characters.
*/
public function get_ascii_domain(): string {
return $this->encoded_domain;
}
/**
* Returns the Unicode form of the domain, suitable for contexts in which
* humans will be reading it. May contain Unicode characters.
*
* Example:
*
* $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
* 'bücher.tld' === $email->get_unicode_domain();
*
* Note! Do not mix a Unicode local part with an ASCII domain part.
* Prefer to keep the entire address in one form.
*
* @see self::get_ascii_domain()
*
* @since 7.1.0
*
* @return string The domain part of the email address.
*/
public function get_unicode_domain(): string {
return $this->decoded_domain;
}
/**
* Returns the complete email address for contexts in which software
* will read it; may contain punycode transliterated Unicode characters.
*
* Use this method in places such as an `<a href>` attribute where other
* software will decode the address.
*
* The returned value can always be passed to {@see WP_Email_Address::from_string()}
* and will produce an equivalent WP_Email_Address instance.
*
* @see self::get_unicode_address()
*
* @since 7.1.0
*
* @return string
*/
public function get_ascii_address(): string {
return $this->localpart . '@' . $this->encoded_domain;
}
/**
* Returns the complete email address for contexts in which humans
* will read it; may contain Unicode characters in the domain.
*
* Use this method in places such as HTML text nodes which visually
* show the email address and domain.
*
* The returned value can always be passed to {@see WP_Email_Address::from_string()}
* and will produce an equivalent WP_Email_Address instance.
*
* @see self::get_ascii_address()
*
* @since 7.1.0
*
* @return string The complete email address.
*/
public function get_unicode_address(): string {
return $this->localpart . '@' . $this->decoded_domain;
}
}
+11
View File
@@ -87,6 +87,17 @@ foreach ( array(
add_filter( $filter, 'wp_filter_kses' );
}
// Email addresses: Allow unicode if and only if as the database can
// store them. This affects all addresses, including those entered
// into contact forms.
if ( 'utf8mb4' === $wpdb->charset ) {
add_filter( 'is_email', 'wp_is_unicode_email', 10, 3 );
add_filter( 'sanitize_email', 'wp_sanitize_unicode_email', 10, 3 );
} else {
add_filter( 'is_email', 'wp_is_ascii_email', 10, 3 );
add_filter( 'sanitize_email', 'wp_sanitize_ascii_email', 10, 3 );
}
// Display URL.
foreach ( array( 'user_url', 'link_url', 'link_image', 'link_rss', 'comment_url', 'post_guid' ) as $filter ) {
if ( is_admin() ) {
+126 -150
View File
@@ -2176,6 +2176,7 @@ function sanitize_user( $username, $strict = false ) {
return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
}
/**
* Sanitizes a string key.
*
@@ -3589,7 +3590,14 @@ function convert_smilies( $text ) {
/**
* Verifies that an email is valid.
*
* Does not grok i18n domains. Not RFC compliant.
* This accepts the addresses that matches the WHATWG specifications,
* i.e. what browsers use for `<input type=email>`. It also accepts some
* additional addresses.
*
* By default this accepts addresses like info@grå.org (also accepted
* by Firefox) `<input type=email>`. You can disable Unicode support by
* using the wp_is_ascii_email filter instead of wp_is_unicode_email,
* which is the default.
*
* @since 0.71
*
@@ -3602,84 +3610,65 @@ function is_email( $email, $deprecated = false ) {
_deprecated_argument( __FUNCTION__, '3.0.0' );
}
// Test for the minimum length the email can be.
if ( strlen( $email ) < 6 ) {
/**
* Filters whether an email address is valid.
*
* This filter is evaluated under several different contexts, such as 'email_too_short',
* 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
* 'domain_no_periods', 'sub_hyphen_limits', 'sub_invalid_chars', or no specific context.
* This filter is evaluated under several different contexts, such as
* 'local_invalid_chars', 'domain_no_periods', or no specific context.
* Filters registered on this hook perform the actual validation; the
* default filter is registered in default-filters.php.
*
* @since 2.8.0
*
* @param string|false $is_email The email address if successfully passed the is_email() checks, false otherwise.
* @param string $email The email address being checked.
* @param string $context Context under which the email was tested.
* @param string|null $context Context under which the email was tested, or null for the initial call.
*/
return apply_filters( 'is_email', false, $email, 'email_too_short' );
}
return apply_filters( 'is_email', false, $email, null );
}
// Test for an @ character after the first position.
if ( false === strpos( $email, '@', 1 ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'email_no_at' );
}
// Split out the local and domain parts.
list( $local, $domain ) = explode( '@', $email, 2 );
/*
* LOCAL PART
* Test for invalid characters.
/**
* Default is_email filter for databases that support Unicode (db charset is utf8mb4).
*
* Validates the email address using {@see WP_Email_Address::from_string()} with Unicode enabled.
* Only acts when $context is null (which it is in the initial validation call); later rescue-context calls are passed through.
*
* @since 7.1.0
*
* @param string|false $value The current filter value.
* @param string $email The email address being checked.
* @param string|null $context Validation context, or null for the initial call.
* @return string|false The email address if valid, false otherwise.
*/
if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'local_invalid_chars' );
function wp_is_unicode_email( $value, $email, $context ) {
if ( null !== $context ) {
return $value;
}
/*
* DOMAIN PART
* Test for sequences of periods.
$result = WP_Email_Address::from_string( $email, 'unicode' );
return $result ? $result->get_unicode_address() : false;
}
/**
* Default is_email filter for databases that do not support Unicode (db charset is not utf8mb4).
*
* Validates the email address using {@see WP_Email_Address::from_string()} with Unicode disabled.
* Only acts when $context is null (which it is in the initial validation call); later rescue-context calls are passed through.
*
* @since 7.1.0
*
* @param string|false $value The current filter value.
* @param string $email The email address being checked.
* @param string|null $context Validation context, or null for the initial call.
* @return string|false The email address if valid, false otherwise.
*/
if ( preg_match( '/\.{2,}/', $domain ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'domain_period_sequence' );
function wp_is_ascii_email( $value, $email, $context ) {
if ( null !== $context ) {
return $value;
}
// Test for leading and trailing periods and whitespace.
if ( trim( $domain, " \t\n\r\0\x0B." ) !== $domain ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'domain_period_limits' );
}
// Split the domain into subs.
$subs = explode( '.', $domain );
// Assume the domain will have at least two subs.
if ( 2 > count( $subs ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'domain_no_periods' );
}
// Loop through each sub.
foreach ( $subs as $sub ) {
// Test for leading and trailing hyphens and whitespace.
if ( trim( $sub, " \t\n\r\0\x0B-" ) !== $sub ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'sub_hyphen_limits' );
}
// Test for invalid characters.
if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' );
}
}
// Congratulations, your email made it!
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'is_email', $email, $email, null );
$result = WP_Email_Address::from_string( $email, 'ascii' );
return $result ? $result->get_unicode_address() : false;
}
/**
@@ -3808,109 +3797,96 @@ function iso8601_to_datetime( $date_string, $timezone = 'user' ) {
}
/**
* Strips out all characters that are not allowable in an email.
* Sanitizes an email address.
*
* Strips stray whitespace from the input, then strips trailing dots from the domain.
* This is designed to recover from cut/paste mistakes without any risk of transforming
* the input into a different address than the user intended.
*
* Validation and final form are determined by the 'sanitize_email' filter; the default
* filter is registered in default-filters.php and delegates to {@see WP_Email_Address::from_string()}.
*
* @since 1.5.0
* @since 7.1.0 Accepts Unicode email addresses on supporting platforms.
*
* @param string $email Email address to filter.
* @return string Filtered email address.
* @param string $email Email address to sanitize.
* @return string The sanitized email address, or an empty string if invalid.
*/
function sanitize_email( $email ) {
// Test for the minimum length the email can be.
if ( strlen( $email ) < 6 ) {
// Strip surrounding whitespace.
$email = trim( $email );
// Extract the address from "Display Name <username@domain>" format.
if ( 1 === preg_match( '/<([^>]+)>$/', $email, $matches ) ) {
$email = $matches[1];
}
/*
* Strip soft hyphens and whitespace adjacent to structural separators (dots and @),
* e.g. copy-paste artifacts like "info@example\u{00AD}.com" or "info@example .com".
*
* In some cases, e.g. autocorrect, some older software has been seen to add the
* space for unrecognized TLDs. This re-joins the parts for proper examination.
*/
$email = preg_replace( '/[\x{00AD}\s]*([.@])[\x{00AD}\s]*/u', '$1', $email ) ?? $email;
// Strip a trailing dot from the domain (e.g. if pasted from the end of a sentence).
if ( str_contains( $email, '@' ) ) {
list( $local, $domain ) = explode( '@', $email, 2 );
$domain = rtrim( $domain, '.' );
$email = $local . '@' . $domain;
}
/**
* Filters a sanitized email address.
*
* This filter is evaluated under several contexts, including 'email_too_short',
* 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits',
* 'domain_no_periods', 'domain_no_valid_subs', or no context.
* Filters registered on this hook perform the actual validation and return
* the canonical email string on success or an empty string on failure.
* The default filter is registered in default-filters.php.
*
* @since 2.8.0
*
* @param string $sanitized_email The sanitized email address.
* @param string $email The email address, as provided to sanitize_email().
* @param string|null $message A message to pass to the user. null if email is sanitized.
* @param string $sanitized_email The sanitized email address, or empty string.
* @param string $email The email address as provided to sanitize_email().
* @param string|null $context Validation context, or null for the initial call.
*/
return apply_filters( 'sanitize_email', '', $email, 'email_too_short' );
}
return apply_filters( 'sanitize_email', '', $email, null );
}
// Test for an @ character after the first position.
if ( false === strpos( $email, '@', 1 ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', '', $email, 'email_no_at' );
}
// Split out the local and domain parts.
list( $local, $domain ) = explode( '@', $email, 2 );
/*
* LOCAL PART
* Test for invalid characters.
/**
* Default sanitize_email filter for databases that support Unicode (db charset is utf8mb4).
*
* Returns the canonical address from {@see WP_Email_Address::from_string()} with Unicode
* enabled, or an empty string if the address is invalid.
*
* @since 7.1.0
*
* @param string $value The current filter value.
* @param string $email The email address being sanitized.
* @param string|null $context Sanitization context, always null.
* @return string The canonical email address if valid, empty string otherwise.
*/
$local = preg_replace( '/[^a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]/', '', $local );
if ( '' === $local ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', '', $email, 'local_invalid_chars' );
}
function wp_sanitize_unicode_email( $value, $email, $context ) {
$result = WP_Email_Address::from_string( $email, 'unicode' );
return $result ? $result->get_unicode_address() : '';
}
/*
* DOMAIN PART
* Test for sequences of periods.
/**
* Default sanitize_email filter for databases that do not support Unicode (db charset is not utf8mb4).
*
* Returns the canonical address from {@see WP_Email_Address::from_string()} with Unicode
* disabled, or an empty string if the address is invalid.
*
* @since 7.1.0
*
* @param string $value The current filter value.
* @param string $email The email address being sanitized.
* @param string|null $context Sanitization context, always null.
* @return string The canonical email address if valid, empty string otherwise.
*/
$domain = preg_replace( '/\.{2,}/', '', $domain );
if ( '' === $domain ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', '', $email, 'domain_period_sequence' );
}
// Test for leading and trailing periods and whitespace.
$domain = trim( $domain, " \t\n\r\0\x0B." );
if ( '' === $domain ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', '', $email, 'domain_period_limits' );
}
// Split the domain into subs.
$subs = explode( '.', $domain );
// Assume the domain will have at least two subs.
if ( 2 > count( $subs ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', '', $email, 'domain_no_periods' );
}
// Create an array that will contain valid subs.
$new_subs = array();
// Loop through each sub.
foreach ( $subs as $sub ) {
// Test for leading and trailing hyphens.
$sub = trim( $sub, " \t\n\r\0\x0B-" );
// Test for invalid characters.
$sub = preg_replace( '/[^a-z0-9-]+/i', '', $sub );
// If there's anything left, add it to the valid subs.
if ( '' !== $sub ) {
$new_subs[] = $sub;
}
}
// If there aren't 2 or more valid subs.
if ( 2 > count( $new_subs ) ) {
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', '', $email, 'domain_no_valid_subs' );
}
// Join valid subs into the new domain.
$domain = implode( '.', $new_subs );
// Put the email back together.
$sanitized_email = $local . '@' . $domain;
// Congratulations, your email made it!
/** This filter is documented in wp-includes/formatting.php */
return apply_filters( 'sanitize_email', $sanitized_email, $email, null );
function wp_sanitize_ascii_email( $value, $email, $context ) {
$result = WP_Email_Address::from_string( $email, 'ascii' );
return $result ? $result->get_unicode_address() : '';
}
/**
+1 -1
View File
@@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '7.1-alpha-62481';
$wp_version = '7.1-alpha-62482';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
+1
View File
@@ -112,6 +112,7 @@ wp_set_lang_dir();
require ABSPATH . WPINC . '/class-wp-list-util.php';
require ABSPATH . WPINC . '/class-wp-token-map.php';
require ABSPATH . WPINC . '/utf8.php';
require ABSPATH . WPINC . '/class-wp-email-address.php';
require ABSPATH . WPINC . '/formatting.php';
require ABSPATH . WPINC . '/meta.php';
require ABSPATH . WPINC . '/functions.php';