Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions e2e/cases/py-image-layer-symlinks-567/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
load("@aspect_rules_py//py:defs.bzl", "py_binary", "py_image_layer")
load("@rules_shell//shell:sh_test.bzl", "sh_test")

py_binary(
name = "main",
srcs = ["main.py"],
)

py_image_layer(
name = "layers",
binary = ":main",
)

# Extract the interpreter tar listing for the test to inspect.
genrule(
name = "interpreter_listing",
srcs = [":layers_interpreter"],
outs = ["interpreter_listing.txt"],
cmd = "tar -tvf $< 2>/dev/null > $@",
)

sh_test(
name = "test_resolve_symlinks",
srcs = ["test_resolve_symlinks.sh"],
data = [":interpreter_listing"],
)
2 changes: 2 additions & 0 deletions e2e/cases/py-image-layer-symlinks-567/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"""Minimal py_binary for testing py_image_layer symlink preservation."""
print("hello")
64 changes: 64 additions & 0 deletions e2e/cases/py-image-layer-symlinks-567/test_resolve_symlinks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash
# Integration test: verify py_image_layer preserves symlinks in the interpreter tar.
# Validates the fix for https://github.com/aspect-build/rules_py/issues/567
#
# Builds a real py_image_layer from a py_binary and checks that the interpreter
# tar contains symlink entries (e.g. python -> python3.x) instead of full copies.

set -euo pipefail

PASS=0
FAIL=0

assert() {
local label="$1"
shift
if "$@"; then
PASS=$((PASS + 1))
else
echo "FAIL: $label"
FAIL=$((FAIL + 1))
fi
}

# Find the interpreter listing in runfiles
LISTING=""
for f in $(find "$TEST_SRCDIR" -name 'interpreter_listing.txt' 2>/dev/null); do
LISTING="$f"
break
done

if [ -z "$LISTING" ]; then
echo "FAIL: could not find interpreter_listing.txt in runfiles"
exit 1
fi

echo "Inspecting interpreter layer listing:"
grep '/bin/python' "$LISTING"
echo ""

# python and python3 should be symlinks (tar -tv shows 'l' prefix for symlinks)
assert "bin/python is a symlink to python3.x" \
grep -qE '^l.*bin/python -> python3\.' "$LISTING"

assert "bin/python3 is a symlink to python3.x" \
grep -qE '^l.*bin/python3 -> python3\.' "$LISTING"

# The real python3.X binary should be a regular file (not a symlink)
assert "bin/python3.X is a regular file" \
grep -qE '^-.*bin/python3\.[0-9]+ *$' "$LISTING"

# No symlink should have an absolute target (would break in containers)
assert "no absolute symlink targets" \
bash -c '! grep -qE "^l.* -> /." "$1"' _ "$LISTING"

# The python entry should NOT be a large regular file (would mean no dedup)
assert "bin/python is not a 100MB+ regular file copy" \
bash -c '! grep -qE "^-.* [0-9]{9,}.*bin/python *$" "$1"' _ "$LISTING"

# --- Summary ---

echo "Results: $PASS passed, $FAIL failed"
if [ "$FAIL" -gt 0 ]; then
exit 1
fi
71 changes: 62 additions & 9 deletions py/private/py_image_layer.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,54 @@ load("@bazel_lib//lib:transitions.bzl", "platform_transition_filegroup")
load("@tar.bzl//tar:mtree.bzl", "mtree_mutate", "mtree_spec")
load("@tar.bzl//tar:tar.bzl", "tar")

# Resolve filesystem-level symlinks in mtree specs so that tars preserve them.
#
# mtree_spec records every file as type=file with a distinct content= path, even
# when files are symlinks to each other (e.g. python/python3 -> python3.13 in the
# Python toolchain). Without this, the tar contains multiple full copies.
#
# Detection uses two-step readlink in the Bazel sandbox:
# 1. readlink(sandbox_path) -> absolute cache path (always succeeds in sandbox)
# 2. readlink(cache_path) -> non-empty only if the real file is a symlink
# Only relative targets from step 2 are converted; absolute targets are cross-repo
# ctx.actions.symlink references that would break in containers.
#
# A Starlark rule (not a genrule) is required because genrules cannot access
# runfiles from srcs — only DefaultInfo.default_runfiles gives sandbox access.
#
# See: https://github.com/aspect-build/rules_py/issues/567

_RESOLVE_SYMLINKS_SCRIPT = """\
/type=file/ && /content=/ {
match($0, /content=[^ ]+/); split(substr($0, RSTART, RLENGTH), p, "=")
c = ""; cmd = "readlink \\\"" p[2] "\\\""; cmd | getline c; close(cmd)
if (c != "") { t = ""; cmd2 = "readlink \\\"" c "\\\""; cmd2 | getline t; close(cmd2)
if (t != "" && t !~ /^\\//) { sub(/type=file/, "type=link"); sub(/content=[^ ]+/, "link=" t) }
}
} 1"""

def _resolve_symlinks_impl(ctx):
out = ctx.outputs.out
ctx.actions.run_shell(
command = "awk '%s' %s > %s" % (_RESOLVE_SYMLINKS_SCRIPT, ctx.file.mtree.path, out.path),
inputs = depset([ctx.file.mtree], transitive = [
s[DefaultInfo].default_runfiles.files
for s in ctx.attr.srcs
]),
outputs = [out],
)
return [DefaultInfo(files = depset([out]))]

_resolve_symlinks = rule(
doc = "Post-process an mtree spec to convert filesystem symlinks to type=link entries.",
implementation = _resolve_symlinks_impl,
attrs = {
"mtree": attr.label(mandatory = True, allow_single_file = True),
"srcs": attr.label_list(mandatory = True),
"out": attr.output(mandatory = True),
},
)

default_layer_groups = {
# match *only* external repositories containing a Python interpreter,
# by matching the interpreter repo naming convention:
Expand Down Expand Up @@ -158,23 +206,28 @@ def py_image_layer(
# Produce the manifest for a tar file of our py_binary, but don't tar it up yet, so we can split
# into fine-grained layers for better pull, push and remote cache performance.
manifest_name = name + ".manifest"
mtree_spec(
name = manifest_name + ".raw",
srcs = [binary],
**kwargs
)
_resolve_symlinks(
name = manifest_name + ".symlinks_resolved",
mtree = manifest_name + ".raw",
srcs = [binary],
out = manifest_name + ".symlinks_resolved.mtree",
)
if owner:
mtree_spec(
name = manifest_name + ".preprocessed",
srcs = [binary],
**kwargs
)
mtree_mutate(
name = manifest_name,
mtree = name + ".manifest.preprocessed",
mtree = manifest_name + ".symlinks_resolved",
owner = owner,
group = group,
)
else:
mtree_spec(
native.filegroup(
name = manifest_name,
srcs = [binary],
**kwargs
srcs = [manifest_name + ".symlinks_resolved"],
)

groups = dict(**layer_groups)
Expand Down