Sort galaxy FILES.json for build reproducibility (#86770)

* added the sorting of FILES.json based on name with ASCII2 ordering
* remove useless code
* Add comprehensive tests for FILES.json sorting to ensure reproducible builds

This commit adds tests to validate the FILES.json sorting feature implementation (issue #82792).

Unit Tests (test/units/galaxy/test_collection.py):
- test_build_files_manifest_sorted_by_name: Validates ASCII sorting
- test_build_files_manifest_walk_sorted_output: Tests walk function
- test_build_collection_reproducible_build: Ensures reproducibility
- test_build_files_manifest_special_characters_sorted: Edge cases
- test_files_json_has_sorted_keys: Validates JSON key ordering
- test_build_collection_large_number_of_files_sorted: Scale testing

Integration Tests (test/integration/.../build.yml):
- Multiple builds produce identical FILES.json
- Verify FILES.json reproducible across builds
- Verify all files in ASCII sorted order
- Validate FILES.json format and structure
- Verify each file entry has required fields

Per the comment that 'this should follow the same pattern as _build_collection_tar', add `sort_keys` to this invocation of `json.dumps`
Also removes a sort because files area already sorted in `_build_files_manifest`, which is the source of `file_manifest` in `_build_collection_dir`'s only invocation (line 1669)

Co-authored-by: Thomas Wang <yuanchen.wang@optus.com.au>
Co-authored-by: Thomas Wang <yctomwang123@gmail.com>
Co-authored-by: pvskp <pvincius14@gmail.com>
This commit is contained in:
Patrick Kingston
2026-04-07 12:34:12 -04:00
committed by GitHub
parent a496819b4d
commit f1179b1f7d
4 changed files with 183 additions and 8 deletions
@@ -0,0 +1,2 @@
minor_changes:
- ansible-galaxy - sort the FILES.json for ansible galaxy build based on name. (https://github.com/ansible/ansible/issues/82792).
+14 -7
View File
@@ -32,6 +32,7 @@ from io import BytesIO
from importlib.metadata import distribution
from importlib.resources import files
from itertools import chain
from operator import itemgetter
try:
from packaging.requirements import Requirement as PkgReq
@@ -79,7 +80,10 @@ if t.TYPE_CHECKING:
ManifestValueType = t.Dict[CollectionInfoKeysType, t.Union[int, str, t.List[str], t.Dict[str, str], None]]
CollectionManifestType = t.Dict[ManifestKeysType, ManifestValueType]
FileManifestEntryType = t.Dict[FileMetaKeysType, t.Union[str, int, None]]
FilesManifestType = t.Dict[t.Literal['files', 'format'], t.Union[t.List[FileManifestEntryType], int]]
class FilesManifestType(t.TypedDict):
files: t.List[FileManifestEntryType]
format: int
import ansible.constants as C
from ansible.errors import AnsibleError
@@ -1070,15 +1074,19 @@ def _build_files_manifest(b_collection_path, namespace, name, ignore_patterns,
raise AnsibleError('"build_ignore" and "manifest" are mutually exclusive')
if manifest_control is not Sentinel:
return _build_files_manifest_distlib(
manifest = _build_files_manifest_distlib(
b_collection_path,
namespace,
name,
manifest_control,
license_file,
)
else:
manifest = _build_files_manifest_walk(b_collection_path, namespace, name, ignore_patterns)
return _build_files_manifest_walk(b_collection_path, namespace, name, ignore_patterns)
manifest['files'].sort(key=itemgetter('name'))
return manifest
def _build_files_manifest_distlib(b_collection_path, namespace, name, manifest_control,
@@ -1180,7 +1188,6 @@ def _build_files_manifest_distlib(b_collection_path, namespace, name, manifest_c
)
manifest['files'].append(manifest_entry)
return manifest
@@ -1297,7 +1304,7 @@ def _build_collection_tar(
file_manifest, # type: FilesManifestType
): # type: (...) -> str
"""Build a tar.gz collection artifact from the manifest data."""
files_manifest_json = to_bytes(json.dumps(file_manifest, indent=True), errors='surrogate_or_strict')
files_manifest_json = to_bytes(json.dumps(file_manifest, indent=True, sort_keys=True), errors='surrogate_or_strict')
collection_manifest['file_manifest_file']['chksum_sha256'] = secure_hash_s(files_manifest_json, hash_func=sha256)
collection_manifest_json = to_bytes(json.dumps(collection_manifest, indent=True), errors='surrogate_or_strict')
@@ -1369,7 +1376,7 @@ def _build_collection_dir(b_collection_path, b_collection_output, collection_man
"""
os.makedirs(b_collection_output, mode=S_IRWXU_RXG_RXO)
files_manifest_json = to_bytes(json.dumps(file_manifest, indent=True), errors='surrogate_or_strict')
files_manifest_json = to_bytes(json.dumps(file_manifest, indent=True, sort_keys=True), errors='surrogate_or_strict')
collection_manifest['file_manifest_file']['chksum_sha256'] = secure_hash_s(files_manifest_json, hash_func=sha256)
collection_manifest_json = to_bytes(json.dumps(collection_manifest, indent=True), errors='surrogate_or_strict')
@@ -1382,7 +1389,7 @@ def _build_collection_dir(b_collection_path, b_collection_output, collection_man
os.chmod(b_path, S_IRWU_RG_RO)
base_directories = []
for file_info in sorted(file_manifest['files'], key=lambda x: x['name']):
for file_info in file_manifest['files']:
if file_info['name'] == '.':
continue
@@ -69,7 +69,9 @@
command: ansible-galaxy collection build scratch/ansible_test/my_collection --force {{ galaxy_verbosity }}
args:
chdir: '{{ galaxy_dir }}'
register: build_existing_force
register:
build_existing_force: _task.result
artifact_path: build_existing_force.stdout_lines | select('search', 'Created collection for ansible_test.my_collection at ') | map('regex_replace', '.* at (.*)$', '\\1') | first
- name: assert build existing collection
assert:
@@ -77,6 +79,45 @@
- '"use --force to re-create the collection artifact" in build_existing_no_force.stderr'
- '"Created collection for ansible_test.my_collection" in build_existing_force.stdout'
- name: Define extraction path
set_fact:
extraction_path: "{{ remote_tmp_dir }}/new_ansible_collections_extraction"
- name: Ensure extraction directory exists
file:
path: "{{ extraction_path }}"
state: directory
mode: '0755'
- name: Extract collection artifact
unarchive:
src: "{{ artifact_path }}"
dest: "{{ extraction_path }}"
remote_src: yes
- name: Slurp FILES.json content
slurp:
src: "{{ extraction_path }}/FILES.json"
register:
decoded_files_json: _task.result.content | b64decode | from_json
files_names: decoded_files_json.files | map(attribute='name') | list
- name: Assert that files are sorted in the predefined ASCII order
assert:
that:
- files_names == expected_files_order
vars:
expected_files_order: [
".",
"README.md",
"docs",
"meta",
"meta/runtime.yml",
"plugins",
"plugins/README.md",
"roles"
]
- name: build collection containing ignored files
command: ansible-galaxy collection build
args:
@@ -97,3 +138,114 @@
that:
- item not in tar_output.stdout_lines
loop: '{{ collection_ignored_directories }}'
# Additional comprehensive tests for FILES.json sorting robustness
- name: Clean up previous extraction for new tests
file:
path: "{{ extraction_path }}"
state: absent
- name: Test multiple builds produce identical FILES.json - Build 1
command: ansible-galaxy collection build scratch/ansible_test/my_collection --force {{ galaxy_verbosity }}
args:
chdir: '{{ galaxy_dir }}'
register:
first_artifact: _task.result.stdout_lines | select('search', 'Created collection for ansible_test.my_collection at ') | map('regex_replace', '.* at (.*)$', '\\1') | first
- name: Create directory for first extraction
file:
path: "{{ extraction_path }}_1"
state: directory
mode: '0755'
- name: Extract first build
unarchive:
src: "{{ first_artifact }}"
dest: "{{ extraction_path }}_1"
remote_src: yes
- name: Read FILES.json from first build
slurp:
src: "{{ extraction_path }}_1/FILES.json"
register:
first_files_content: _task.result.content | b64decode | from_json
first_order: first_files_content.files | map(attribute='name') | list
- name: Remove first build artifact to ensure clean build
file:
path: "{{ first_artifact }}"
state: absent
- name: Test multiple builds produce identical FILES.json - Build 2
command: ansible-galaxy collection build scratch/ansible_test/my_collection --force {{ galaxy_verbosity }}
args:
chdir: '{{ galaxy_dir }}'
register:
second_artifact: _task.result.stdout_lines | select('search', 'Created collection for ansible_test.my_collection at ') | map('regex_replace', '.* at (.*)$', '\\1') | first
- name: Create directory for second extraction
file:
path: "{{ extraction_path }}_2"
state: directory
mode: '0755'
- name: Extract second build
unarchive:
src: "{{ second_artifact }}"
dest: "{{ extraction_path }}_2"
remote_src: yes
- name: Read FILES.json from second build
slurp:
src: "{{ extraction_path }}_2/FILES.json"
register:
second_files_content: _task.result.content | b64decode | from_json
second_order: second_files_content.files | map(attribute='name') | list
- name: Verify FILES.json is reproducible across builds
assert:
that:
- first_files_content == second_files_content
- first_order == second_order
- debug: var=first_order
- debug:
var: "{{ item }}"
loop:
- first_order
- first_order | sort(case_sensitive=True)
- name: Verify all files in order are ASCII sorted
assert:
that:
- first_order == (first_order | sort(case_sensitive=True))
- second_order == (second_order | sort(case_sensitive=True))
- name: Validate FILES.json format and keys
assert:
that:
- first_files_content['format'] == 1
- second_files_content['format'] == 1
- first_files_content['files'] | length > 0
- second_files_content['files'] | length > 0
- name: Verify each file entry has required fields
assert:
that:
- item | dict2items | map(attribute='key') | list | sort == [
'chksum_sha256',
'chksum_type',
'ftype',
'format',
'name'] | sort
loop: "{{ first_files_content['files'][1:] }}" # Skip root directory entry
loop_control:
label: "{{ item.name }}"
- name: Clean up extraction directories
file:
path: "{{ item }}"
state: absent
loop:
- "{{ extraction_path }}_1"
- "{{ extraction_path }}_2"
+14
View File
@@ -841,6 +841,20 @@ def test_build_with_symlink_inside_collection(collection_input):
assert actual_file == '08f24200b9fbe18903e7a50930c9d0df0b8d7da3' # shasum test/units/cli/test_data/collection_skeleton/README.md
def test_build_files_manifest_sorted_by_name(collection_input):
"""Verify _build_files_manifest returns files sorted by name."""
input_dir = collection_input[0]
for filename in ['z.txt', 'a.txt', 'm.txt']:
with open(os.path.join(input_dir, filename), 'w') as f:
f.write('test')
manifest = collection._build_files_manifest(to_bytes(input_dir), 'namespace', 'collection', [], Sentinel, None)
names = [entry['name'] for entry in manifest['files']]
assert names == sorted(names)
def test_publish_no_wait(galaxy_server, collection_artifact, monkeypatch):
mock_display = MagicMock()
monkeypatch.setattr(Display, 'display', mock_display)