restore: refuse archives containing more than one container

`restore` derived the container name per-member and tracked them in a
dict/set, so an archive with members for several distinct names would
lock, clear, and restore all of them. `backup` only ever writes a single
container, so such archives are only hand-crafted or legacy and silently
overwrite more than the user asked for.

Fix the first valid member as the sole target and reject any member that
names a different container. Also clamp a hardlink's source to that
target so a crafted linkname can't read out of an unrelated container.
This commit is contained in:
Sylirre
2026-06-09 07:59:55 +00:00
parent d6dff97aee
commit 98aff324b7
3 changed files with 63 additions and 17 deletions
+3
View File
@@ -621,6 +621,9 @@ HELP_PAGES = {
"file header. Supported: gzip, bzip2, xz, "
"uncompressed tar. Applies to both file and "
"stdin input."
"\n\n"
"Only one container is restored per archive. An "
"archive holding more than one container is rejected."
),
},
],
+41 -16
View File
@@ -20,6 +20,8 @@
# Architecture: Extracts a proot container backup from a TAR archive.
# Expected archive structure: <name>/manifest.json + <name>/rootfs/*.
# Exactly one container is restored per archive (all `backup` ever
# writes); a member naming a second container is rejected.
# Legacy archives with installed-rootfs/<name> layout are also accepted:
# contents are re-rooted to containers/<name>/rootfs/. Archives with no
# subdirectory are rejected. Compression is auto-detected via tarfile r|*
@@ -218,7 +220,14 @@ def _safe_dest(container_name: str, dest: str, *, follow_final: bool = False):
def command_restore(args) -> None:
"""Reinstate one or more containers from a tar backup."""
"""Reinstate a single container from a tar backup.
An archive is expected to hold exactly one container (this is all
`backup` ever produces). The first valid member fixes the target;
any member naming a different container makes the restore ambiguous
and is rejected, so a hand-crafted or legacy multi-container archive
can never silently overwrite more than the user asked for.
"""
archive = getattr(args, "archive", None)
verbose = getattr(args, "verbose", False)
@@ -247,7 +256,6 @@ def command_restore(args) -> None:
done_size = 0
total_size = 0
counter = None
cleared: set = set()
def _on_entry(member_size: int, member_name: str) -> None:
nonlocal done_size
@@ -268,9 +276,13 @@ def command_restore(args) -> None:
return len(parts) == 1 and not name.endswith('/')
raw_fh = None
# Per-container exclusive locks acquired lazily on first member encounter,
# before any modification to that container's rootfs.
pending_locks: dict = {}
# Restore targets exactly one container. The first valid member fixes
# the name; its exclusive lock is acquired lazily on that first sighting,
# before any modification to the container's rootfs. A member naming a
# different container is rejected (see the loop below).
restore_name = None
lock = None
cleared = False
# Dirs whose archived mode lacks owner rwx: temporarily widened so we
# can write into them, with the final chmod deferred until extraction
# finishes. Applied in reverse insertion order so children are sealed
@@ -303,10 +315,14 @@ def command_restore(args) -> None:
if container_name is None:
continue
# Acquire exclusive lock for this container before
# touching it. Restore archives are streamed so we can't
# pre-scan the names; lock lazily on first sighting.
if container_name not in pending_locks:
# Only one container may be restored per archive. The first
# valid member fixes the target and acquires its exclusive
# lock; a member naming a different container is rejected so a
# multi-container archive can't overwrite more than the user
# asked for. Archives are streamed, so this is detected on the
# fly rather than by pre-scanning the member names.
if restore_name is None:
restore_name = container_name
lock = ContainerLock(
container_name, exclusive=True, command="restore"
)
@@ -316,12 +332,18 @@ def command_restore(args) -> None:
log_error(f"Cannot restore: container "
f"'{container_name}' is busy{hint}.")
sys.exit(1)
pending_locks[container_name] = lock
elif container_name != restore_name:
clear_bar()
log_error(f"Cannot restore: archive contains more than "
f"one container ('{restore_name}' and "
f"'{container_name}'). Restore handles a single "
f"container at a time.")
sys.exit(1)
# On the first entry for a given container's rootfs, clear it.
if container_name not in cleared:
_clear_existing_rootfs(container_name)
cleared.add(container_name)
# On the first entry for the container's rootfs, clear it.
if not cleared:
_clear_existing_rootfs(restore_name)
cleared = True
# Resolve the parent through any symlink planted by an
# earlier member, clamped inside this container's dir, so
@@ -359,7 +381,10 @@ def command_restore(args) -> None:
# links on the host fs would share inodes across what the
# guest treats as independent files.
link_container, link_src = _dest_path(member.linkname)
if link_src is None:
if link_src is None or link_container != restore_name:
# Skip a linkname that resolves nowhere or points at
# a different container — it must not read out of an
# unrelated container's rootfs.
continue
# Clamp the read source inside the container too, so a
# linkname routed through a planted symlink can't copy
@@ -434,5 +459,5 @@ def command_restore(args) -> None:
finally:
if raw_fh is not None:
raw_fh.close()
for lock in pending_locks.values():
if lock is not None:
lock.release()
+19 -1
View File
@@ -89,7 +89,10 @@ def test_restore_hostile_archive_contained(tmp_path):
{"name": "box/rootfs/etc/hostname", "type": "file", "data": b"guest"},
{"name": "box/rootfs/bad", "type": "hardlink",
"linkname": "../../../../etc/shadow"},
{"name": "/abs/evil", "type": "file", "data": b"P"},
# Absolute-looking member must be re-rooted under the same container,
# never escape onto the host. (A second container name would be
# rejected outright — see test_restore_multiple_containers_rejected.)
{"name": "/box/evil", "type": "file", "data": b"P"},
])
# The good container was created inside the sandbox.
@@ -98,11 +101,26 @@ def test_restore_hostile_archive_contained(tmp_path):
)
# The hostile hard link was not materialised from the host.
assert not os.path.exists(os.path.join(container_rootfs("box"), "bad"))
# The absolute member landed inside the container, not on the host.
assert os.path.exists(os.path.join(container_rootfs("box"), "evil"))
# Nothing escaped to the host.
assert sentinel.read_text() == "SECRET"
assert not os.path.exists(os.path.join(os.path.dirname(str(tmp_path)), "escape"))
def test_restore_multiple_containers_rejected(tmp_path):
# An archive holding members for two distinct containers must be
# refused: restore handles a single container at a time.
with pytest.raises(SystemExit) as exc:
_run_restore(tmp_path, [
{"name": "box/rootfs/etc/hostname", "type": "file", "data": b"a"},
{"name": "other/rootfs/etc/hostname", "type": "file", "data": b"b"},
])
assert exc.value.code == 1
# The second container was never created.
assert not os.path.exists(container_dir("other"))
def test_restore_bare_root_archive_rejected(tmp_path):
from _builders import make_tar
arc = tmp_path / "bad.tar"