Drop email in favor of our own multipart generator. Fixes #76666 (#86687)

This commit is contained in:
sivel / Matt Martz
2026-04-30 14:54:54 -05:00
committed by GitHub
parent 776f90ae4b
commit f074c20929
6 changed files with 342 additions and 116 deletions
@@ -0,0 +1,4 @@
minor_changes:
- url `multipart/form-data` - Replace Python ``email`` multipart generator with custom generator, allowing for binary content without ``Content-Transfer-Encoding``
bugfixes:
- uri - Enable multipart/form-data requests over 2GB (https://github.com/ansible/ansible/issues/76666)
+310 -104
View File
@@ -30,14 +30,10 @@ this code instead.
from __future__ import annotations
import base64
import email.encoders
import email.mime.application
import email.mime.multipart
import email.mime.nonmultipart
import email.parser
import email.policy
import email.utils
import binascii
import collections.abc as _c
import http.client
import io
import mimetypes
import netrc
import os
@@ -50,6 +46,7 @@ import types # pylint: disable=unused-import
import urllib.error
import urllib.request
from contextlib import contextmanager
from functools import partial
from http import cookiejar
from urllib.parse import unquote, urlparse, urlunparse
from urllib.request import BaseHandler
@@ -68,6 +65,7 @@ else:
from ansible.module_utils.basic import missing_required_lib
from ansible.module_utils.common.collections import Mapping, is_sequence
from ansible.module_utils.common.text.converters import to_bytes, to_native, to_text
from ansible.module_utils.compat import typing as _t
try:
import ssl
@@ -1005,17 +1003,310 @@ def open_url(url, data=None, headers=None, method=None, use_proxy=True,
unredirected_headers=unredirected_headers, decompress=decompress, ciphers=ciphers, use_netrc=use_netrc)
def prepare_multipart(fields):
# deprecated: description='TypedDict Required/NotRequired' python_version='3.11'
# TODO: When Python 3.11+ is the minimum, use Required/NotRequired to properly
# enforce that at least one of filename or content is required
class _MultipartField(_t.TypedDict, total=False):
"""TypedDict for multipart field configuration.
At least one of 'filename' or 'content' must be provided.
"""
filename: str
content: str
mime_type: str
multipart_encoding: _t.Literal['base64', '7or8bit']
_CTE: _t.TypeAlias = _t.Literal[b'base64', b'7bit']
_EncToCTEMap: dict[str, _CTE] = {
'base64': b'base64',
'7or8bit': b'7bit',
}
# Constants for multipart generation
_CHUNK_SIZE = 65536
_DEFAULT_MIME_TYPE = b'application/octet-stream'
_TEXT_CONTENT_TYPE = b'text/plain'
class _MultipartReader(io.RawIOBase):
"""File-like reader for streaming multipart content without materializing in memory.
Wraps a generator that yields multipart chunks and provides read()/readinto()
interface compatible with urllib and other file-like consumers.
"""
def __init__(self, generator: _t.Generator[bytes, None, None]) -> None:
self._generator = generator
self._buffer = bytearray()
self._exhausted = False
def readable(self) -> bool:
return True
def readinto(self, buffer: bytearray | memoryview) -> int: # type: ignore[override]
"""Read up to len(buffer) bytes into buffer. Returns number of bytes read."""
size = len(buffer)
data = self.read(size)
n = len(data)
buffer[:n] = data
return n
def read(self, size: int = -1) -> bytes:
"""Read up to size bytes. If size is -1 or None, read all remaining data."""
if self._exhausted and not self._buffer:
return b''
if size is None or size < 0:
all_data = bytearray(self._buffer)
if not self._exhausted:
for chunk in self._generator:
all_data.extend(chunk)
self._exhausted = True
self._buffer.clear()
return bytes(all_data)
while len(self._buffer) < size and not self._exhausted:
try:
chunk = next(self._generator)
self._buffer.extend(chunk)
except StopIteration:
self._exhausted = True
break
data = bytes(self._buffer[:size])
del self._buffer[:size]
return data
class MultipartProtocol(_t.Protocol):
"""Protocol for multipart form-data generators.
This defines the public interface for multipart objects without
exposing implementation details. Use this for type hints.
"""
@property
def content_type(self) -> str:
"""Content-Type header value including boundary."""
def add(
self,
name: bytes,
filename: bytes | None = None,
filepath: bytes | None = None,
content: bytes | None = None,
cte: _CTE | None = None,
ct: bytes = _DEFAULT_MIME_TYPE,
) -> None:
"""Add a field to the multipart body."""
def as_iter(self) -> _t.Generator[bytes, None, None]:
"""Generator yielding each multipart part as bytes, then the final boundary."""
def as_fp(self) -> _MultipartReader:
"""Return a file-like reader that streams multipart content."""
def as_bytes(self) -> bytes:
"""Return all multipart content as bytes."""
class _Multipart:
class _Part(_t.TypedDict):
name: bytes
filename: bytes | None
filepath: bytes | None
content: bytes | None
cte: _CTE | None
ct: bytes
def __init__(self) -> None:
self._nl = b'\r\n'
self._boundary = b'----AnsibleFormBoundary' + binascii.hexlify(os.urandom(16))
self._parts: list[_Multipart._Part] = []
@property
def content_type(self) -> str:
return 'multipart/form-data; boundary=%s' % self._boundary.decode()
def add(
self,
name: bytes,
filename: bytes | None = None,
filepath: bytes | None = None,
content: bytes | None = None,
cte: _CTE | None = None,
ct: bytes = _DEFAULT_MIME_TYPE,
) -> None:
# deprecated: description='TypedDict Required/NotRequired for _MultipartField' python_version='3.11'
if filepath and content:
raise ValueError('only one of filepath or content can be supplied')
if not filepath and not content:
raise ValueError('one of filepath or content must be supplied')
self._parts.append({
'name': name,
'filename': filename,
'filepath': filepath,
'content': content,
'cte': cte,
'ct': ct,
})
def as_iter(self) -> _t.Generator[bytes, None, None]:
"""Generator yielding each multipart part as bytes, then the final boundary."""
for part in self._parts:
yield self._generate_header(part)
if part['cte']:
if part['filepath']:
with open(part['filepath'], 'rb') as f:
yield from self._encode(f, part['cte'])
else:
yield from self._encode(io.BytesIO(part['content']), part['cte'])
# encoders are expected to return their own trailing nl
else:
if part['filepath']:
with open(part['filepath'], 'rb') as f:
yield from iter(partial(f.read, _CHUNK_SIZE), b'')
else:
yield part['content']
yield self._nl
yield b'--' + self._boundary + b'--' + self._nl
def as_fp(self) -> _MultipartReader:
"""Return a file-like reader that streams multipart content.
The returned reader supports read() and readinto() for streaming the
multipart body without materializing it entirely in memory. This is
useful for large file uploads (>2GB).
"""
return _MultipartReader(self.as_iter())
def as_bytes(self) -> bytes:
"""Return all multipart content as bytes.
Warning: This materializes the entire multipart body in memory.
For large files, use as_fp() instead to stream the content.
"""
return self.as_fp().read()
def _generate_header(self, part: _Multipart._Part) -> bytes:
buf = io.BytesIO()
buf.write(b'--' + self._boundary + self._nl)
if part['cte']:
buf.write(b'Content-Transfer-Encoding: ' + part['cte'] + self._nl)
disposition = b'form-data; name="%s"' % part['name']
if part['filename']:
disposition += b'; filename="%s"' % part['filename']
buf.write(b'Content-Type: ' + part['ct'] + self._nl)
buf.write(b'Content-Disposition: ' + disposition + self._nl)
buf.write(self._nl)
return buf.getvalue()
def _encode_base64(self, f: io.RawIOBase | io.BufferedIOBase) -> _t.Generator[bytes, None, None]:
"""Encode file-like object content as base64, yielding chunks."""
# 57 bytes encodes to exactly 76 base64 chars (one line)
for chunk in iter(partial(f.read, 57), b''):
yield binascii.b2a_base64(chunk, newline=False) + self._nl
def _encode_passthru(self, f: io.RawIOBase | io.BufferedIOBase) -> _t.Generator[bytes, None, None]:
"""Read file-like object content as-is, yielding chunks."""
yield from iter(partial(f.read, _CHUNK_SIZE), b'')
yield self._nl
def _encode(self, f: io.RawIOBase | io.BufferedIOBase, cte: _CTE) -> _t.Generator[bytes, None, None]:
if cte == b'base64':
yield from self._encode_base64(f)
else:
yield from self._encode_passthru(f)
def create_multipart(fields: _t.Mapping[str, str | _MultipartField]) -> MultipartProtocol:
"""Creates a ``MultipartProtocol`` instance from a fields mapping.
This function processes the fields mapping and returns a ``MultipartProtocol``
object that can be used to generate ``multipart/form-data`` bodies.
Use this function when you need streaming access to the multipart
data (e.g., for large files). For most cases, use ``prepare_multipart()``
instead.
For field format details, see ``prepare_multipart()``.
"""
if not isinstance(fields, Mapping):
raise TypeError(
'Mapping is required, cannot be type %s' % fields.__class__.__name__
)
m = _Multipart()
for field, value in sorted(fields.items()):
if isinstance(value, str):
m.add(
name=to_bytes(field),
content=to_bytes(value),
ct=_TEXT_CONTENT_TYPE,
)
elif isinstance(value, Mapping):
filename = value.get('filename')
content = value.get('content')
if not any((filename, content)):
raise ValueError('at least one of filename or content must be provided')
mime: bytes = to_bytes(value.get('mime_type'), nonstring='passthru')
if not mime:
try:
mime = to_bytes(
mimetypes.guess_type(filename or '', strict=False)[0],
nonstring='passthru'
) or _DEFAULT_MIME_TYPE
except Exception:
mime = _DEFAULT_MIME_TYPE
cte: _CTE | None
if multipart_encoding := value.get('multipart_encoding'):
try:
cte = _EncToCTEMap[multipart_encoding]
except KeyError:
raise ValueError('multipart_encoding must be one of %s.' % repr(tuple(_EncToCTEMap)))
else:
cte = None
if filename and not content:
b_filename = to_bytes(filename, errors='surrogate_or_strict')
m.add(
name=to_bytes(field),
filename=os.path.basename(b_filename),
filepath=b_filename,
cte=cte,
ct=mime,
)
else:
m.add(
name=to_bytes(field),
filename=to_bytes(filename) if filename else None,
content=to_bytes(content),
cte=cte,
ct=mime,
)
else:
raise TypeError(
'value must be a string, or mapping, cannot be type %s' % value.__class__.__name__
)
return m
def prepare_multipart(fields: _t.Mapping[str, str | _MultipartField]) -> tuple[str, bytes]:
"""Takes a mapping, and prepares a multipart/form-data body
:arg fields: Mapping
:returns: tuple of (content_type, body) where ``content_type`` is
the ``multipart/form-data`` ``Content-Type`` header including
``boundary`` and ``body`` is the prepared bytestring body
Payload content from a file will be base64 encoded and will include
the appropriate ``Content-Transfer-Encoding`` and ``Content-Type``
headers.
Payload content from a file can optionally be encoded when
``multipart_encoding`` is set to 'base64' or '7or8bit'. Without
encoding specified, files are sent as-is (binary). The appropriate
``Content-Transfer-Encoding`` and ``Content-Type`` headers will be
included.
Example:
{
@@ -1031,93 +1322,8 @@ def prepare_multipart(fields):
"text_form_field": "value"
}
"""
if not isinstance(fields, Mapping):
raise TypeError(
'Mapping is required, cannot be type %s' % fields.__class__.__name__
)
m = email.mime.multipart.MIMEMultipart('form-data')
for field, value in sorted(fields.items()):
if isinstance(value, str):
main_type = 'text'
sub_type = 'plain'
content = value
filename = None
elif isinstance(value, Mapping):
filename = value.get('filename')
multipart_encoding_str = value.get('multipart_encoding') or 'base64'
content = value.get('content')
if not any((filename, content)):
raise ValueError('at least one of filename or content must be provided')
mime = value.get('mime_type')
if not mime:
try:
mime = mimetypes.guess_type(filename or '', strict=False)[0] or 'application/octet-stream'
except Exception:
mime = 'application/octet-stream'
main_type, sep, sub_type = mime.partition('/')
else:
raise TypeError(
'value must be a string, or mapping, cannot be type %s' % value.__class__.__name__
)
if not content and filename:
multipart_encoding = set_multipart_encoding(multipart_encoding_str)
with open(to_bytes(filename, errors='surrogate_or_strict'), 'rb') as f:
part = email.mime.application.MIMEApplication(f.read(), _encoder=multipart_encoding)
del part['Content-Type']
part.add_header('Content-Type', '%s/%s' % (main_type, sub_type))
else:
part = email.mime.nonmultipart.MIMENonMultipart(main_type, sub_type)
part.set_payload(to_bytes(content))
part.add_header('Content-Disposition', 'form-data')
del part['MIME-Version']
part.set_param(
'name',
field,
header='Content-Disposition'
)
if filename:
part.set_param(
'filename',
to_native(os.path.basename(filename)),
header='Content-Disposition'
)
m.attach(part)
# Ensure headers are not split over multiple lines
# The HTTP policy also uses CRLF by default
b_data = m.as_bytes(policy=email.policy.HTTP)
del m
headers, sep, b_content = b_data.partition(b'\r\n\r\n')
del b_data
parser = email.parser.BytesHeaderParser().parsebytes
return (
parser(headers)['content-type'], # Message converts to native strings
b_content
)
def set_multipart_encoding(encoding):
"""Takes an string with specific encoding type for multipart data.
Will return reference to function from email.encoders library.
If given string key doesn't exist it will raise a ValueError"""
encoders_dict = {
"base64": email.encoders.encode_base64,
"7or8bit": email.encoders.encode_7or8bit
}
if encoders_dict.get(encoding):
return encoders_dict.get(encoding)
else:
raise ValueError("multipart_encoding must be one of %s." % repr(encoders_dict.keys()))
m = create_multipart(fields)
return m.content_type, m.as_bytes()
#
@@ -1408,7 +1614,7 @@ def fetch_file(module, url, data=None, headers=None, method=None,
:returns: A string, the path to the downloaded file.
"""
# download file
bufsize = 65536
bufsize = _CHUNK_SIZE
parts = urlparse(url)
file_prefix, file_ext = _split_multiext(os.path.basename(parts.path), count=2)
fetch_temp_file = tempfile.NamedTemporaryFile(dir=module.tmpdir, prefix=file_prefix, suffix=file_ext, delete=False)
+4 -2
View File
@@ -445,10 +445,10 @@ from urllib.parse import urlencode, urljoin
from ansible.module_utils.basic import AnsibleModule, sanitize_keys
from ansible.module_utils.common.text.converters import to_native, to_text
from ansible.module_utils.urls import (
create_multipart,
fetch_url,
get_response_filename,
parse_content_type,
prepare_multipart,
url_argument_spec,
url_redirect_argument_spec,
)
@@ -648,7 +648,9 @@ def main():
dict_headers['Content-Type'] = 'application/x-www-form-urlencoded'
elif body_format == 'form-multipart':
try:
content_type, body = prepare_multipart(body)
multipart = create_multipart(body)
content_type = multipart.content_type
body = multipart.as_fp()
except (TypeError, ValueError) as e:
module.fail_json(msg='failed to parse body as form-multipart: %s' % to_native(e))
dict_headers['Content-Type'] = content_type
+6 -2
View File
@@ -444,6 +444,7 @@
body:
file1:
filename: formdata.txt
multipart_encoding: base64
file2:
content: text based file content
filename: fake.txt
@@ -451,6 +452,8 @@
file3:
filename: formdata.txt
multipart_encoding: '7or8bit'
file4:
filename: formdata.txt
text_form_field1: value1
text_form_field2:
content: value2
@@ -462,7 +465,8 @@
that:
- multipart.json.files.file1 | b64decode == '_multipart/form-data_\n'
- multipart.json.files.file2 == 'text based file content'
- multipart.json.files.file3 == '_multipart/form-data_\r\n'
- multipart.json.files.file3 == '_multipart/form-data_\n'
- multipart.json.files.file4 == '_multipart/form-data_\n'
- multipart.json.form.text_form_field1 == 'value1'
- multipart.json.form.text_form_field2 == 'value2'
@@ -493,7 +497,7 @@
- name: Assert multipart/form-data with file and retry
assert:
that:
- result.json.files.file | b64decode == '_multipart/form-data_\n'
- result.json.files.file == '_multipart/form-data_\n'
- result.attempts == 2
- name: Validate invalid method
@@ -97,7 +97,6 @@ V2N4OXhzcnpYbkNLRTNaaUdiV2YxWk1TemZSUGFqWlNtdEZIVTJuRXA4cGQycFZ3YlVkRHFXNQpU
emdXUEgyRnJ2OGpOTWNzOWhRUFZlKzRBSW54c29wMUZVR0JjdEJEcG9iUkJ1Yk9nWDVmTStiMEdk
WndBTHBJCmJnWDlURHpEVVJ1OVF4b2t4WG5xZXZDRnBVQVFoZWtqQ1FtQU9KMjhnVjVaakZwTldG
YTVjY0o0emZPdwotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
--===============3996062709511591449==
Content-Transfer-Encoding: base64
Content-Type: application/octet-stream
@@ -133,7 +132,6 @@ dWV0SS9pSS9vM3dPekx2ekFvR0FJck9oMzBySHQ4d2l0N0VMQVJ5eAp3UGtwMkFSWVhyS2ZYM05F
UzRjNjd6U0FpKzNkQ2p4UnF5d3FUSTBnTGljeU1sajh6RXU5WUU5SXgvcmw4bFJaCm5ROUxabXF2
N1FIemhMVFVDUEdnWlluZW12QnpvN3IwZVc4T2FnNTJkYmNKTzZGQnN6ZldyeHNrbS9mWDI1UmIK
V1B4aWgydmRSeTgxNGROUFcyNXJnZHc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K
--===============3996062709511591449==
Content-Transfer-Encoding: base64
Content-Type: text/plain
@@ -142,15 +140,22 @@ Content-Disposition: form-data; name="file6"; filename="client.txt"
Y2xpZW50LnBlbSBhbmQgY2xpZW50LmtleSB3ZXJlIHJldHJpZXZlZCBmcm9tIGh0dHB0ZXN0ZXIg
ZG9ja2VyIGltYWdlOgoKYW5zaWJsZS9hbnNpYmxlQHNoYTI1NjpmYTVkZWY4YzI5NGZjNTA4MTNh
ZjEzMWMwYjU3Mzc1OTRkODUyYWJhYzljYmU3YmEzOGUxN2JmMWM4NDc2ZjNmCg==
--===============3996062709511591449==
Content-Transfer-Encoding: 7bit
Content-Type: text/plain
Content-Disposition: form-data; name="file7"; filename="client.txt"
client.pem and client.key were retrieved from httptester docker image:
client.pem and client.key were retrieved from httptester docker image:
ansible/ansible@sha256:fa5def8c294fc50813af131c0b5737594d852abac9cbe7ba38e17bf1c8476f3f
ansible/ansible@sha256:fa5def8c294fc50813af131c0b5737594d852abac9cbe7ba38e17bf1c8476f3f
--===============3996062709511591449==
Content-Type: text/plain
Content-Disposition: form-data; name="file8"; filename="client.txt"
client.pem and client.key were retrieved from httptester docker image:
ansible/ansible@sha256:fa5def8c294fc50813af131c0b5737594d852abac9cbe7ba38e17bf1c8476f3f
--===============3996062709511591449==
Content-Type: text/plain
@@ -52,18 +52,23 @@ def test_prepare_multipart():
'file4': {
'filename': client_cert,
'mime_type': 'text/plain',
'multipart_encoding': 'base64',
},
'file5': {
'filename': client_key,
'mime_type': 'application/octet-stream'
'mime_type': 'application/octet-stream',
'multipart_encoding': 'base64',
},
'file6': {
'filename': client_txt,
'multipart_encoding': 'base64'
'multipart_encoding': 'base64',
},
'file7': {
'filename': client_txt,
'multipart_encoding': '7or8bit'
'multipart_encoding': '7or8bit',
},
'file8': {
'filename': client_txt,
},
}