diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 80d10f5e..67a4f6d4 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -3,7 +3,13 @@ "allow": [ "Bash(gh issue *)", "Bash(buildifier *)", - "Bash(echo \"exit: $?\")" + "Bash(echo \"exit: $?\")", + "Bash(go get *)", + "Bash(go mod *)", + "Bash(cat datadog_test/dd_tar_writer/go.sum)", + "Bash(sort -u go.sum -o go.sum)", + "Bash(python3 *)", + "Bash(git -C /Users/tony.aiuto/ws/rules_pkg log --oneline tests/tar/pkg_tar_test.py)" ] } } diff --git a/MODULE.bazel b/MODULE.bazel index 2c7fef4c..e13f4f08 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -1,7 +1,6 @@ module( name = "rules_pkg", version = "", # set by release pipeline from version.bzl. - compatibility_level = 1, repo_name = "rules_pkg", ) @@ -42,3 +41,20 @@ local_repository( name = "mappings_test_external_repo", path = "tests/mappings/external_repo", ) + +bazel_dep(name = "rules_go", version = "0.60.0") +bazel_dep(name = "gazelle", version = "0.50.0") + +go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk") +use_repo(go_sdk, "go_host_compatible_sdk_label") + +go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps") +go_deps.from_file(go_mod = "//:go.mod") +use_repo( + go_deps, + "com_github_davecgh_go_spew", + "com_github_pmezard_go_difflib", + "com_github_stretchr_testify", + "com_github_ulikunitz_xz", + "in_gopkg_yaml_v3", +) diff --git a/datadog_test/dd_tar_writer/BUILD.bazel b/datadog_test/dd_tar_writer/BUILD.bazel new file mode 100644 index 00000000..8039cd31 --- /dev/null +++ b/datadog_test/dd_tar_writer/BUILD.bazel @@ -0,0 +1,29 @@ +load("@rules_go//go:def.bzl", "go_binary", "go_library", "go_test") + +go_library( + name = "dd_tar_writer_lib", + srcs = ["main.go"], + importpath = "github.com/DataDog/datadog-agent/bazel/rules/dd_tar_writer", + visibility = ["//visibility:private"], + deps = [ + "@com_github_ulikunitz_xz//:xz", + ], +) + +go_binary( + name = "dd_tar_writer", + embed = [":dd_tar_writer_lib"], + visibility = ["//visibility:public"], +) + +go_test( + name = "dd_tar_writer_test", + srcs = ["main_test.go"], + embed = [":dd_tar_writer_lib"], + gotags = ["test"], + deps = [ + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + "@com_github_ulikunitz_xz//:xz", + ], +) diff --git a/datadog_test/dd_tar_writer/REQUIREMENTS.md b/datadog_test/dd_tar_writer/REQUIREMENTS.md new file mode 100644 index 00000000..5b794aba --- /dev/null +++ b/datadog_test/dd_tar_writer/REQUIREMENTS.md @@ -0,0 +1,184 @@ +# Requirements: dd_tar_writer + +## Purpose + +`dd_tar_writer` is a Bazel-compatible tar archive builder used in the Datadog Agent +package build pipeline. It is a drop-in replacement for the `build_tar.py` tool +shipped with `rules_pkg`, with one additional capability: it produces a **sidecar +MD5 manifest file** listing the checksum of every regular file written into the +archive. + +This sidecar file is consumed by the `dd_tar` Bazel rule (see +`bazel/rules/dd_tar.bzl`) which exposes it via `OutputGroupInfo`, making it +available to downstream rules such as `dd_pkg_deb`. + +--- + +## Inputs + +### Positional + +None. All inputs are provided via flags. + +### Flags + +The following flags match `rules_pkg`'s `build_tar.py` exactly and must remain +compatible with however `rules_pkg` invokes the tool: + +| Flag | Type | Description | +|------|------|-------------| +| `--output` | string (required) | Path to the output tar file | +| `--manifest` | string | Path to the rules_pkg JSON manifest file | +| `--mode` | octal string | Default file mode applied to all files (e.g. `0755`) | +| `--mtime` | int or `"portable"` | Default mtime. `"portable"` = 946684800 (2000-01-01 UTC) | +| `--tar` | string (repeatable) | Existing tar file to merge into the output | +| `--deb` | string (repeatable) | Debian package whose `data.tar.*` to merge | +| `--directory` | string | Prefix prepended to all archive paths | +| `--compression` | `gz`, `bz2`, `xz` | Compression algorithm | +| `--compressor` | string | External compressor command, e.g. `pigz -p 4` | +| `--compression_level` | int | Compression level (0–9, or -1 for default) | +| `--modes` | `path=octal` (repeatable) | Per-file mode override | +| `--owners` | `path=uid.gid` (repeatable) | Per-file numeric owner override | +| `--owner` | `uid.gid` | Default numeric owner (default: `0.0`) | +| `--owner_name` | `user.group` | Default owner name | +| `--owner_names` | `path=user.group` (repeatable) | Per-file owner name override | +| `--stamp_from` | string | Path to Bazel volatile status file; overrides `--mtime` with `BUILD_TIMESTAMP` | +| `--create_parents` | bool flag | Auto-create implied parent directories | +| `--allow_dups_from_deps` | bool flag | Suppress duplicate-path warnings | +| `--preserve_mode` | bool flag | Use the source file's actual permissions | +| `--preserve_mtime` | bool flag | Use the source file's actual mtime | + +**New flag (dd_tar_writer extension, not in build_tar.py):** + +| Flag | Type | Description | +|------|------|-------------| +| `--md5sums_output` | string | If set, write the MD5 sidecar to this path | + +### Response files + +Arguments may be placed in a file and passed as `@path/to/file`, one argument +per line. This matches Python argparse `fromfile_prefix_chars='@'` semantics and +is required for Bazel's param-file mechanism. + +--- + +## Inputs from `--manifest` + +The manifest is a JSON array of objects, each describing one entry to add to the +archive. The schema comes from `rules_pkg`'s `pkg/private/manifest.py`. + +### Entry fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `type` | string | yes | Entry type (see below) | +| `dest` | string | yes | Destination path in the archive | +| `src` | string | no | Source path on disk | +| `mode` | string | no | Octal mode string, overrides default | +| `user` | string | no | Owner username | +| `group` | string | no | Owner group name | +| `uid` | int | no | Numeric user ID | +| `gid` | int | no | Numeric group ID | +| `origin` | string | no | Informational only | +| `repository` | string | no | Informational only | + +### Entry types + +| Value | Meaning | +|-------|---------| +| `"file"` | Regular file: content taken from `src` | +| `"symlink"` | Symbolic link: `dest` → `src` (literal) | +| `"raw_symlink"` | Symbolic link: `dest` → target read from `os.Readlink(src)` | +| `"dir"` | Empty directory at `dest` | +| `"tree"` | Directory tree rooted at `src`, placed under `dest` | +| `"empty-file"` | Zero-byte regular file at `dest` | + +### Attribute precedence (low → high) + +1. Tool defaults (`--owner 0.0`, mode derived from file executable bit) +2. Tool-level flag overrides (`--mode`, `--owner`, `--owner_name`) +3. Per-file flag overrides (`--modes`, `--owners`, `--owner_names`) +4. Manifest entry fields (`mode`, `uid`/`gid`, `user`/`group`) + +--- + +## Outputs + +### Tar archive (`--output`) + +- Format: **GNU tar** (`tar.FormatGNU` in Go / `tarfile.GNU_FORMAT` in Python) +- Entries are written in manifest order; merging tars come after manifest entries +- Parent directories are only auto-created if `--create_parents` is set +- Duplicate paths: first occurrence wins; subsequent occurrences are silently + dropped (with a warning to stderr unless `--allow_dups_from_deps`) +- Symlinks and directories included as tar entries but carry no file content + +### MD5 sidecar (`--md5sums_output`) + +Written only when the flag is provided. + +- One line per **regular file** (type `file`, `empty-file`, or regular file from + a merged tar) +- Symlinks and directories are **excluded** +- Format identical to `md5sum(1)`: + + ``` + <32-hex-chars> + ``` + + Note: two spaces between hash and path. Archive paths do not have a leading `/`. + +- Lines are written in the order files were added to the archive (manifest order + first, then merged tars, then merged debs) +- Empty files produce the MD5 of the empty string: + `d41d8cd98f00b204e9800998ecf8427e` + +--- + +## Invariants + +1. **Determinism**: given identical inputs and flags, the output tar and MD5 sidecar + are bit-for-bit identical. Callers must ensure `--mtime` or `--stamp_from` is + set consistently. Gzip output sets `Header.ModTime` to the same value as the + tar mtime to prevent the gzip header from being a source of non-determinism. + +2. **MD5 coverage**: every regular file whose content is written into the tar is + listed in the sidecar. The sidecar never lists a file that is not in the tar. + +3. **No self-reference**: the MD5 sidecar is a separate file; it is not included + in the tar it describes, so there is no circular dependency. + +4. **GNU format**: the tar output uses GNU format for compatibility with + `pkg_deb`'s `make_deb.py` and the broader Linux toolchain. + +5. **CLI compatibility**: all flags listed above that also exist in `build_tar.py` + must behave identically to `build_tar.py` so that the tool can be substituted + by changing only the `_tar_tool` attribute of `dd_tar`. + +--- + +## When rules_pkg changes its API + +The interface between `dd_tar.bzl` and this binary is derived from how +`rules_pkg`'s `tar.bzl` invokes `build_tar.py`. When upgrading rules_pkg, check: + +1. **New flags added to `build_tar.py`**: add the corresponding flag to this tool. + Check `pkg/private/tar/build_tar.py` in the new rules_pkg version. + +2. **Manifest schema changes**: compare `pkg/private/manifest.py`. New fields in + `ManifestEntry` or new entry types must be handled. Unknown types should produce + a clear error message. + +3. **Archive format changes**: if rules_pkg switches from GNU to PAX format, update + the `Format` field in every `tar.Header` written by this tool. + +4. **Output provider changes**: if `tar.bzl` changes what it returns (e.g., exposes + a new output group), update `dd_tar.bzl` correspondingly. The MD5 group must + remain in `OutputGroupInfo`. + +5. **Invocation changes**: check whether `build_tar.py` is now called differently + (param files, new arg order, new environment variables). Update this tool to + match. + +The requirements in this document should be re-read at upgrade time to verify the +implementation still satisfies them. diff --git a/datadog_test/dd_tar_writer/go.mod b/datadog_test/dd_tar_writer/go.mod new file mode 100644 index 00000000..1afe693c --- /dev/null +++ b/datadog_test/dd_tar_writer/go.mod @@ -0,0 +1,14 @@ +module github.com/bazelbuild/rules_pkg/dd_tar_writer + +go 1.24.2 + +require ( + github.com/stretchr/testify v1.11.1 + github.com/ulikunitz/xz v0.5.15 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/datadog_test/dd_tar_writer/go.sum b/datadog_test/dd_tar_writer/go.sum new file mode 100644 index 00000000..1b4946ef --- /dev/null +++ b/datadog_test/dd_tar_writer/go.sum @@ -0,0 +1,12 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY= +github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/datadog_test/dd_tar_writer/main.go b/datadog_test/dd_tar_writer/main.go new file mode 100644 index 00000000..85b48ca7 --- /dev/null +++ b/datadog_test/dd_tar_writer/main.go @@ -0,0 +1,1144 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +// Package main implements dd_tar_writer: a Bazel-compatible tar archive builder +// that produces a sidecar MD5 checksum manifest alongside the tar output. +// +// It is a drop-in replacement for rules_pkg's build_tar.py. It accepts the same +// CLI flags and the same manifest JSON format, and adds one flag: +// +// --md5sums_output path to write the MD5 sidecar file +// +// See REQUIREMENTS.md for the full specification. +package main + +import ( + "archive/tar" + "bufio" + "compress/bzip2" + "compress/gzip" + "crypto/md5" + "encoding/hex" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "os/exec" + "path" + "path/filepath" + "sort" + "strconv" + "strings" + "time" + + "github.com/ulikunitz/xz" +) + +// portableMtime is the deterministic timestamp used for reproducible builds. +// 2000-01-01 00:00:00.000 UTC — matches rules_pkg's PORTABLE_MTIME constant. +const portableMtime int64 = 946684800 + +// parseMode parses an octal mode string, accepting both "0o755" (Python-style) +// and "0755" / "755" (C-style) representations. +func parseMode(s string) (uint64, error) { + s = strings.TrimPrefix(strings.ToLower(s), "0o") + return strconv.ParseUint(s, 8, 32) +} + +// Entry type constants — must stay in sync with manifest.py in rules_pkg. +const ( + entryIsFile = "file" + entryIsLink = "symlink" + entryIsRawLink = "raw_symlink" + entryIsDir = "dir" + entryIsTree = "tree" + entryIsEmptyFile = "empty-file" +) + +// ManifestEntry matches one JSON record in a rules_pkg manifest file. +type ManifestEntry struct { + Type string `json:"type"` + Dest string `json:"dest"` + Src string `json:"src"` + Mode string `json:"mode"` // octal string, may be empty + User string `json:"user"` + Group string `json:"group"` + UID *int `json:"uid"` + GID *int `json:"gid"` + Origin string `json:"origin,omitempty"` + Repository string `json:"repository,omitempty"` +} + +// multiFlag is a flag.Value that accumulates repeated --flag occurrences. +type multiFlag []string + +func (f *multiFlag) String() string { return strings.Join(*f, ", ") } +func (f *multiFlag) Set(v string) error { + *f = append(*f, v) + return nil +} + +// Config holds all parsed CLI options. +type Config struct { + Output string + ManifestPath string + DefaultMode os.FileMode // meaningful only when HasDefaultMode is true + HasDefaultMode bool + DefaultMtime int64 + Tars []string + Debs []string + Directory string // archive-path prefix, without slashes + Compression string // "", "gz", "bz2", "xz" + Compressor string // external command, e.g. "pigz -p 4" + ModeMap map[string]os.FileMode + IDsMap map[string][2]int + NamesMap map[string][2]string + DefaultIDs [2]int + DefaultNames [2]string + CreateParents bool + AllowDupsFromDeps bool + PreserveMode bool + PreserveMtime bool + CompressionLevel int + Md5sumsOutput string +} + +// md5Entry records one file's checksum for the sidecar file. +type md5Entry struct { + archivePath string + hexSum string +} + +// TarWriter builds a tar archive and accumulates MD5 checksums of regular files. +type TarWriter struct { + cfg Config + outFile *os.File + tw *tar.Writer + closers []io.Closer // closed in order after tw is closed + written map[string]byte // archive path (no trailing /) → tar Typeflag + md5s []md5Entry +} + +// — — — entry point — — — + +func main() { + expanded, err := expandArgs(os.Args[1:]) + if err != nil { + fmt.Fprintf(os.Stderr, "dd_tar_writer: %v\n", err) + os.Exit(1) + } + cfg, err := parseFlags(expanded) + if err != nil { + fmt.Fprintf(os.Stderr, "dd_tar_writer: %v\n", err) + os.Exit(1) + } + if err := run(cfg); err != nil { + fmt.Fprintf(os.Stderr, "dd_tar_writer: %v\n", err) + os.Exit(1) + } +} + +// expandArgs expands @filename response-file arguments (same semantics as +// Python argparse fromfile_prefix_chars='@'). Each line in the file becomes +// one argument; blank lines are skipped. +func expandArgs(args []string) ([]string, error) { + var out []string + for _, arg := range args { + if !strings.HasPrefix(arg, "@") { + out = append(out, arg) + continue + } + data, err := os.ReadFile(arg[1:]) + if err != nil { + return nil, fmt.Errorf("reading param file %s: %w", arg[1:], err) + } + scanner := bufio.NewScanner(strings.NewReader(string(data))) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line != "" { + out = append(out, line) + } + } + } + return out, nil +} + +// parseFlags parses the CLI flags into a Config. +func parseFlags(args []string) (Config, error) { + fset := flag.NewFlagSet("dd_tar_writer", flag.ContinueOnError) + + output := fset.String("output", "", "The output file, mandatory.") + manifestPath := fset.String("manifest", "", "Manifest of contents to add to the layer.") + modeStr := fset.String("mode", "", "Force the mode on added files (octal).") + mtimeStr := fset.String("mtime", "", "Set mtime on tar entries: integer or 'portable'.") + directory := fset.String("directory", "", "Directory prefix for all archive paths.") + compression := fset.String("compression", "", "Compression: gz, bz2, or xz.") + compressor := fset.String("compressor", "", "External compressor command, e.g. 'pigz -p 4'.") + owner := fset.String("owner", "0.0", "Default numeric owner uid.gid.") + ownerName := fset.String("owner_name", "", "Default owner name user.group.") + stampFrom := fset.String("stamp_from", "", "File containing BUILD_TIMESTAMP.") + createParents := fset.Bool("create_parents", false, "Auto-create implied parent directories.") + allowDups := fset.Bool("allow_dups_from_deps", false, "Allow duplicate paths from deps.") + preserveMode := fset.Bool("preserve_mode", false, "Preserve original file permissions.") + preserveMtime := fset.Bool("preserve_mtime", false, "Preserve original file mtime.") + compressionLevel := fset.Int("compression_level", -1, "Compression level (0–9 or -1 for default).") + md5sumsOutput := fset.String("md5sums_output", "", "Path to write the MD5 sidecar file.") + + var tars, debs, modes, owners, ownerNames multiFlag + fset.Var(&tars, "tar", "A tar file to merge (repeatable).") + fset.Var(&debs, "deb", "A deb file to merge (repeatable).") + fset.Var(&modes, "modes", "Per-file mode: path=0755 (repeatable).") + fset.Var(&owners, "owners", "Per-file numeric owner: path=0.0 (repeatable).") + fset.Var(&ownerNames, "owner_names", "Per-file owner name: path=root.root (repeatable).") + + if err := fset.Parse(args); err != nil { + return Config{}, err + } + if *output == "" { + return Config{}, fmt.Errorf("--output is required") + } + + // --directory may be "@path/to/file" (package_dir_file indirection). + directoryValue := *directory + if strings.HasPrefix(directoryValue, "@") { + data, err := os.ReadFile(directoryValue[1:]) + if err != nil { + return Config{}, fmt.Errorf("reading directory file %s: %w", directoryValue[1:], err) + } + directoryValue = strings.TrimSpace(string(data)) + } + + cfg := Config{ + Output: *output, + ManifestPath: *manifestPath, + Tars: []string(tars), + Debs: []string(debs), + Directory: strings.Trim(directoryValue, "/"), + Compression: strings.ToLower(*compression), + Compressor: *compressor, + CreateParents: *createParents, + AllowDupsFromDeps: *allowDups, + PreserveMode: *preserveMode, + PreserveMtime: *preserveMtime, + CompressionLevel: *compressionLevel, + Md5sumsOutput: *md5sumsOutput, + } + + // Default file mode (optional). + if *modeStr != "" { + m, err := parseMode(*modeStr) + if err != nil { + return Config{}, fmt.Errorf("invalid --mode %q: %w", *modeStr, err) + } + cfg.DefaultMode = os.FileMode(m) + cfg.HasDefaultMode = true + } + + // Default mtime. + switch *mtimeStr { + case "portable": + cfg.DefaultMtime = portableMtime + case "": + cfg.DefaultMtime = 0 + default: + t, err := strconv.ParseInt(*mtimeStr, 10, 64) + if err != nil { + return Config{}, fmt.Errorf("invalid --mtime %q: %w", *mtimeStr, err) + } + cfg.DefaultMtime = t + } + if *stampFrom != "" { + ts, err := readTimestampFromStampFile(*stampFrom) + if err != nil { + return Config{}, fmt.Errorf("reading stamp file: %w", err) + } + cfg.DefaultMtime = ts + } + + // Default numeric owner (uid.gid). + uidgid := strings.SplitN(*owner, ".", 2) + if len(uidgid) != 2 { + return Config{}, fmt.Errorf("invalid --owner %q: expected uid.gid", *owner) + } + uid, err := strconv.Atoi(uidgid[0]) + if err != nil { + return Config{}, fmt.Errorf("invalid --owner uid: %w", err) + } + gid, err := strconv.Atoi(uidgid[1]) + if err != nil { + return Config{}, fmt.Errorf("invalid --owner gid: %w", err) + } + cfg.DefaultIDs = [2]int{uid, gid} + + // Default owner name. + if *ownerName != "" { + parts := strings.SplitN(*ownerName, ".", 2) + if len(parts) != 2 { + return Config{}, fmt.Errorf("invalid --owner_name %q: expected user.group", *ownerName) + } + cfg.DefaultNames = [2]string{parts[0], parts[1]} + } + + // Per-file mode overrides. + cfg.ModeMap = make(map[string]os.FileMode) + for _, entry := range modes { + k, v, ok := strings.Cut(entry, "=") + if !ok { + return Config{}, fmt.Errorf("invalid --modes entry %q: expected path=octal", entry) + } + k = strings.TrimPrefix(k, "/") + m, err := parseMode(v) + if err != nil { + return Config{}, fmt.Errorf("invalid mode in --modes %q: %w", entry, err) + } + cfg.ModeMap[k] = os.FileMode(m) + } + + // Per-file numeric owner overrides. + cfg.IDsMap = make(map[string][2]int) + for _, entry := range owners { + k, v, ok := strings.Cut(entry, "=") + if !ok { + return Config{}, fmt.Errorf("invalid --owners entry %q: expected path=uid.gid", entry) + } + k = strings.TrimPrefix(k, "/") + p := strings.SplitN(v, ".", 2) + if len(p) != 2 { + return Config{}, fmt.Errorf("invalid owner in --owners %q", entry) + } + u, _ := strconv.Atoi(p[0]) + g, _ := strconv.Atoi(p[1]) + cfg.IDsMap[k] = [2]int{u, g} + } + + // Per-file owner name overrides. + cfg.NamesMap = make(map[string][2]string) + for _, entry := range ownerNames { + k, v, ok := strings.Cut(entry, "=") + if !ok { + return Config{}, fmt.Errorf("invalid --owner_names entry %q: expected path=user.group", entry) + } + k = strings.TrimPrefix(k, "/") + p := strings.SplitN(v, ".", 2) + if len(p) != 2 { + return Config{}, fmt.Errorf("invalid owner_name in --owner_names %q", entry) + } + cfg.NamesMap[k] = [2]string{p[0], p[1]} + } + + return cfg, nil +} + +// run is the core logic, separated from main() for testability. +func run(cfg Config) (err error) { + tw, err := newTarWriter(cfg) + if err != nil { + return err + } + defer func() { + if closeErr := tw.close(); closeErr != nil && err == nil { + err = closeErr + } + }() + + // Process manifest entries. + if cfg.ManifestPath != "" { + entries, err := readManifest(cfg.ManifestPath) + if err != nil { + return fmt.Errorf("reading manifest: %w", err) + } + for _, e := range entries { + if err := tw.addManifestEntry(e); err != nil { + return fmt.Errorf("manifest entry %q: %w", e.Dest, err) + } + } + } + + // Merge --tar files. + for _, tarPath := range cfg.Tars { + if err := tw.mergeTar(tarPath); err != nil { + return fmt.Errorf("merging tar %s: %w", tarPath, err) + } + } + + // Merge --deb files. + for _, debPath := range cfg.Debs { + if err := tw.mergeDeb(debPath); err != nil { + return fmt.Errorf("merging deb %s: %w", debPath, err) + } + } + + // Write MD5 sidecar before closing (md5s are computed during writing). + if cfg.Md5sumsOutput != "" { + if err := tw.writeMd5sums(cfg.Md5sumsOutput); err != nil { + return fmt.Errorf("writing md5sums: %w", err) + } + } + + return nil +} + +// — — — TarWriter construction — — — + +// nopCloser wraps an io.Writer with a no-op Close method. +type nopCloser struct{ io.Writer } + +func (nopCloser) Close() error { return nil } + +// procCloser writes to the pipe writer and, on Close, signals EOF to the +// external compressor process and waits for it to exit. +type procCloser struct { + pw *io.PipeWriter + cmd *exec.Cmd +} + +func (p *procCloser) Write(b []byte) (int, error) { return p.pw.Write(b) } + +func (p *procCloser) Close() error { + if err := p.pw.Close(); err != nil { + return err + } + return p.cmd.Wait() +} + +func newTarWriter(cfg Config) (*TarWriter, error) { + outFile, err := os.Create(cfg.Output) + if err != nil { + return nil, err + } + + tw := &TarWriter{ + cfg: cfg, + outFile: outFile, + written: make(map[string]byte), + } + + var w io.WriteCloser + + switch { + case cfg.Compressor != "": + w, err = startExternalCompressor(outFile, cfg.Compressor) + case cfg.Compression == "gz" || cfg.Compression == "tgz": + w, err = setupGzip(outFile, cfg.CompressionLevel, cfg.DefaultMtime) + case cfg.Compression == "xz" || cfg.Compression == "lzma": + w, err = setupXz(outFile) + case cfg.Compression == "bz2" || cfg.Compression == "bzip2": + // bzip2 write is not in stdlib; use external bzip2 command. + w, err = startExternalCompressor(outFile, "bzip2") + case cfg.Compression == "": + w = nopCloser{outFile} + default: + outFile.Close() + return nil, fmt.Errorf("unsupported compression: %q", cfg.Compression) + } + if err != nil { + outFile.Close() + return nil, err + } + + tw.closers = append(tw.closers, w, outFile) + tw.tw = tar.NewWriter(w) + return tw, nil +} + +func setupGzip(dst io.Writer, level int, mtime int64) (io.WriteCloser, error) { + if level < 0 { + level = gzip.DefaultCompression + } + gw, err := gzip.NewWriterLevel(dst, level) + if err != nil { + return nil, err + } + // Set gzip header mtime for deterministic output. + gw.ModTime = time.Unix(mtime, 0).UTC() + return gw, nil +} + +func setupXz(dst io.Writer) (io.WriteCloser, error) { + return xz.NewWriter(dst) +} + +func startExternalCompressor(outFile *os.File, cmd string) (io.WriteCloser, error) { + parts := strings.Fields(cmd) + if len(parts) == 0 { + return nil, fmt.Errorf("empty compressor command") + } + c := exec.Command(parts[0], parts[1:]...) + c.Stdout = outFile + pr, pw := io.Pipe() + c.Stdin = pr + if err := c.Start(); err != nil { + pr.Close() + pw.Close() + return nil, fmt.Errorf("starting compressor %q: %w", cmd, err) + } + return &procCloser{pw: pw, cmd: c}, nil +} + +// close flushes and closes the tar writer, then all closers in order. +func (tw *TarWriter) close() error { + var errs []string + if err := tw.tw.Close(); err != nil { + errs = append(errs, fmt.Sprintf("flushing tar: %v", err)) + } + for _, c := range tw.closers { + if err := c.Close(); err != nil { + errs = append(errs, err.Error()) + } + } + if len(errs) > 0 { + return fmt.Errorf("closing tar writer: %s", strings.Join(errs, "; ")) + } + return nil +} + +// — — — path normalization — — — + +// normalizePath cleans an archive path and applies the directory prefix. +// Equivalent to build_tar.py's TarFile.normalize_path(). +func (tw *TarWriter) normalizePath(p string) string { + p = path.Clean(p) + p = strings.TrimPrefix(p, "./") + p = strings.Trim(p, "/") + if tw.cfg.Directory != "" { + prefix := tw.cfg.Directory + "/" + if !strings.HasPrefix(p, prefix) && p != tw.cfg.Directory { + p = tw.cfg.Directory + "/" + p + } + } + return p +} + +// — — — attribute resolution — — — + +// fileAttrs resolves the effective mode, ids, and names for an archive path, +// applying per-file overrides from ModeMap/IDsMap/NamesMap over the defaults. +// mode is meaningful only when hasMode is true. +func (tw *TarWriter) fileAttrs(archivePath string) (mode os.FileMode, hasMode bool, ids [2]int, names [2]string) { + key := strings.TrimPrefix(strings.TrimRight(archivePath, "/"), "/") + + if m, ok := tw.cfg.ModeMap[key]; ok { + mode, hasMode = m, true + } else { + mode, hasMode = tw.cfg.DefaultMode, tw.cfg.HasDefaultMode + } + + ids = tw.cfg.DefaultIDs + if i, ok := tw.cfg.IDsMap[key]; ok { + ids = i + } + + names = tw.cfg.DefaultNames + if n, ok := tw.cfg.NamesMap[key]; ok { + names = n + } + return +} + +// — — — manifest reading — — — + +func readManifest(manifestPath string) ([]ManifestEntry, error) { + raw, err := os.ReadFile(manifestPath) + if err != nil { + return nil, err + } + // Handle Windows UTF-16 LE encoding that older Bazel versions may produce. + if len(raw) >= 2 && raw[1] == 0x00 { + // Crude UTF-16 LE detection (matches manifest.py logic). + wide := make([]rune, len(raw)/2) + for i := range wide { + wide[i] = rune(raw[2*i]) | rune(raw[2*i+1])<<8 + } + raw = []byte(string(wide)) + } + var entries []ManifestEntry + if err := json.Unmarshal(raw, &entries); err != nil { + return nil, err + } + return entries, nil +} + +// — — — manifest entry dispatch — — — + +func (tw *TarWriter) addManifestEntry(entry ManifestEntry) error { + nonAbsPath := strings.Trim(entry.Dest, "/") + mode, hasMode, ids, names := tw.fileAttrs(nonAbsPath) + + // Manifest entry attributes take precedence over flags. + if entry.Mode != "" { + m, err := parseMode(entry.Mode) + if err == nil { + mode = os.FileMode(m) + hasMode = true + } + } + if entry.User != "" { + names[0] = entry.User + if entry.Group != "" { + names[1] = entry.Group + } + } + if entry.UID != nil { + ids[0] = *entry.UID + if entry.GID != nil { + ids[1] = *entry.GID + } else { + ids[1] = *entry.UID + } + } + + switch entry.Type { + case entryIsLink: + return tw.addSymlink(entry.Dest, entry.Src, ids, names) + case entryIsRawLink: + target, err := os.Readlink(entry.Src) + if err != nil { + return fmt.Errorf("reading symlink %s: %w", entry.Src, err) + } + return tw.addSymlink(entry.Dest, target, ids, names) + case entryIsDir: + return tw.addDir(tw.normalizePath(entry.Dest), mode, ids, names) + case entryIsTree: + return tw.addTree(entry.Src, entry.Dest, mode, hasMode, ids, names) + case entryIsEmptyFile: + return tw.addEmptyFile(tw.normalizePath(entry.Dest), mode, hasMode, ids, names) + case entryIsFile, "": + return tw.addFile(entry.Src, entry.Dest, mode, hasMode, ids, names) + default: + return fmt.Errorf("unknown manifest entry type %q", entry.Type) + } +} + +// — — — duplicate tracking — — — + +// isDuplicate returns true if the path has already been written and should be +// skipped. When AllowDupsFromDeps is set every entry is written unconditionally. +func (tw *TarWriter) isDuplicate(archivePath string, typeflag byte) bool { + if tw.cfg.AllowDupsFromDeps { + return false + } + key := strings.TrimRight(archivePath, "/") + existing, seen := tw.written[key] + if !seen { + return false + } + if typeflag == tar.TypeDir { + if existing != tar.TypeDir && existing != tar.TypeSymlink { + fmt.Fprintf(os.Stderr, "directory shadows archive member %s, picking first occurrence\n", archivePath) + } + } else { + fmt.Fprintf(os.Stderr, "duplicate file in archive: %s, picking first occurrence\n", archivePath) + } + return true +} + +// markWritten records that a path has been added to the archive. +func (tw *TarWriter) markWritten(archivePath string, typeflag byte) { + tw.written[strings.TrimRight(archivePath, "/")] = typeflag +} + +// — — — parent directory auto-creation — — — + +func (tw *TarWriter) conditionallyAddParents(archivePath string, ids [2]int, names [2]string, mtime int64) error { + if !tw.cfg.CreateParents { + return nil + } + // Split on the clean path, iterate over parent components. + parts := strings.Split(strings.TrimRight(archivePath, "/"), "/") + parentPath := "" + for i := 0; i < len(parts)-1; i++ { + if parentPath == "" { + parentPath = parts[i] + "/" + } else { + parentPath = parentPath + parts[i] + "/" + } + key := strings.TrimRight(parentPath, "/") + // Never write "." or "" as explicit tar entries (matches Python behavior). + if key == "." || key == "" { + continue + } + if _, exists := tw.written[key]; !exists { + hdr := &tar.Header{ + Typeflag: tar.TypeDir, + Name: parentPath, + Mode: 0o755, + ModTime: time.Unix(mtime, 0).UTC(), + Uid: ids[0], + Gid: ids[1], + Uname: names[0], + Gname: names[1], + Format: tar.FormatGNU, + } + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + tw.markWritten(parentPath, tar.TypeDir) + } + } + return nil +} + +// — — — individual entry writers — — — + +func (tw *TarWriter) addFile(src, dest string, mode os.FileMode, hasMode bool, ids [2]int, names [2]string) error { + dest = tw.normalizePath(dest) + if tw.isDuplicate(dest, tar.TypeReg) { + return nil + } + + f, err := os.Open(src) + if err != nil { + return err + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + return err + } + + // Determine effective mode. + var finalMode os.FileMode + if tw.cfg.PreserveMode { + finalMode = fi.Mode() & os.ModePerm + } else if hasMode { + finalMode = mode + } else if fi.Mode()&0o111 != 0 { + finalMode = 0o755 + } else { + finalMode = 0o644 + } + + // Determine effective mtime. + mtime := tw.cfg.DefaultMtime + if tw.cfg.PreserveMtime { + mtime = fi.ModTime().Unix() + } + + if err := tw.conditionallyAddParents(dest, ids, names, mtime); err != nil { + return err + } + + hdr := &tar.Header{ + Typeflag: tar.TypeReg, + Name: dest, + Size: fi.Size(), + Mode: int64(finalMode), + ModTime: time.Unix(mtime, 0).UTC(), + Uid: ids[0], + Gid: ids[1], + Uname: names[0], + Gname: names[1], + Format: tar.FormatGNU, + } + + hasher := md5.New() + reader := io.TeeReader(f, hasher) + + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + if _, err := io.Copy(tw.tw, reader); err != nil { + return err + } + + tw.md5s = append(tw.md5s, md5Entry{ + archivePath: dest, + hexSum: hex.EncodeToString(hasher.Sum(nil)), + }) + tw.markWritten(dest, tar.TypeReg) + return nil +} + +func (tw *TarWriter) addEmptyFile(dest string, mode os.FileMode, hasMode bool, ids [2]int, names [2]string) error { + if tw.isDuplicate(dest, tar.TypeReg) { + return nil + } + + var finalMode os.FileMode + if hasMode { + finalMode = mode + } else { + finalMode = 0o644 + } + + if err := tw.conditionallyAddParents(dest, ids, names, tw.cfg.DefaultMtime); err != nil { + return err + } + + hdr := &tar.Header{ + Typeflag: tar.TypeReg, + Name: dest, + Size: 0, + Mode: int64(finalMode), + ModTime: time.Unix(tw.cfg.DefaultMtime, 0).UTC(), + Uid: ids[0], + Gid: ids[1], + Uname: names[0], + Gname: names[1], + Format: tar.FormatGNU, + } + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + + // MD5 of empty content. + hasher := md5.New() + tw.md5s = append(tw.md5s, md5Entry{ + archivePath: dest, + hexSum: hex.EncodeToString(hasher.Sum(nil)), + }) + tw.markWritten(dest, tar.TypeReg) + return nil +} + +func (tw *TarWriter) addDir(dest string, mode os.FileMode, ids [2]int, names [2]string) error { + if !strings.HasSuffix(dest, "/") { + dest += "/" + } + if tw.isDuplicate(dest, tar.TypeDir) { + return nil + } + + // Auto-create missing ancestor directories before writing this entry so + // that parents always precede their children in the archive stream. + if err := tw.conditionallyAddParents(dest, ids, names, tw.cfg.DefaultMtime); err != nil { + return err + } + + var finalMode os.FileMode = 0o755 + if mode != 0 { + finalMode = mode + } + + hdr := &tar.Header{ + Typeflag: tar.TypeDir, + Name: dest, + Mode: int64(finalMode), + ModTime: time.Unix(tw.cfg.DefaultMtime, 0).UTC(), + Uid: ids[0], + Gid: ids[1], + Uname: names[0], + Gname: names[1], + Format: tar.FormatGNU, + } + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + tw.markWritten(dest, tar.TypeDir) + return nil +} + +func (tw *TarWriter) addSymlink(dest, target string, ids [2]int, names [2]string) error { + // Preserve leading "./" if present (matches Python behavior). + if !strings.HasPrefix(dest, "./") { + dest = tw.normalizePath(dest) + } + if tw.isDuplicate(dest, tar.TypeSymlink) { + return nil + } + + if err := tw.conditionallyAddParents(dest, ids, names, tw.cfg.DefaultMtime); err != nil { + return err + } + + hdr := &tar.Header{ + Typeflag: tar.TypeSymlink, + Name: dest, + Linkname: target, + Mode: 0o777, + ModTime: time.Unix(tw.cfg.DefaultMtime, 0).UTC(), + Uid: ids[0], + Gid: ids[1], + Uname: names[0], + Gname: names[1], + Format: tar.FormatGNU, + } + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + // Symlinks are NOT added to md5s. + tw.markWritten(dest, tar.TypeSymlink) + return nil +} + +func (tw *TarWriter) addTree(srcDir, destPath string, mode os.FileMode, hasMode bool, ids [2]int, names [2]string) error { + srcDir = filepath.Clean(srcDir) + + // Build the archive destination prefix. + dest := strings.Trim(destPath, "/") + if tw.cfg.Directory != "" && !strings.HasPrefix(dest, tw.cfg.Directory+"/") { + dest = tw.cfg.Directory + "/" + dest + } + dest = path.Clean(dest) + if dest == "." { + dest = "" + } + + // Collect entries sorted for determinism (Python sorts within each dir level). + type treeEntry struct { + fullPath string + relPath string // / separated + isDir bool + } + var entries []treeEntry + err := filepath.Walk(srcDir, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + rel, _ := filepath.Rel(srcDir, p) + rel = filepath.ToSlash(rel) + if rel == "." { + return nil + } + entries = append(entries, treeEntry{ + fullPath: p, + relPath: rel, + isDir: info.IsDir(), + }) + return nil + }) + if err != nil { + return err + } + sort.Slice(entries, func(i, j int) bool { return entries[i].relPath < entries[j].relPath }) + + for _, e := range entries { + var archivePath string + if dest != "" { + archivePath = dest + "/" + e.relPath + } else { + archivePath = e.relPath + } + archivePath = path.Clean(archivePath) + + if e.isDir { + if err := tw.addDir(archivePath+"/", 0o755, ids, names); err != nil { + return err + } + } else { + if err := tw.addFile(e.fullPath, archivePath, mode, hasMode, ids, names); err != nil { + return err + } + } + } + return nil +} + +// — — — tar merging — — — + +func (tw *TarWriter) mergeTar(tarPath string) error { + tr, cleanup, err := openTarReader(tarPath) + if err != nil { + return err + } + defer cleanup() + return tw.mergeTarReader(tr) +} + +func (tw *TarWriter) mergeTarReader(tr *tar.Reader) error { + for { + hdr, err := tr.Next() + if err == io.EOF { + return nil + } + if err != nil { + return err + } + + // Apply directory prefix. + if tw.cfg.Directory != "" { + hdr.Name = path.Clean(tw.cfg.Directory + "/" + hdr.Name) + if hdr.Typeflag == tar.TypeDir && !strings.HasSuffix(hdr.Name, "/") { + hdr.Name += "/" + } + } + + // Strip owner names (numeric only, matching Python behavior). + hdr.Uname = "" + hdr.Gname = "" + + // Override mtime unless preserving. + if !tw.cfg.PreserveMtime { + hdr.ModTime = time.Unix(tw.cfg.DefaultMtime, 0).UTC() + } + + if err := tw.conditionallyAddParents(hdr.Name, [2]int{hdr.Uid, hdr.Gid}, [2]string{}, tw.cfg.DefaultMtime); err != nil { + return err + } + + if tw.isDuplicate(hdr.Name, hdr.Typeflag) { + continue + } + + isRegular := hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA + if isRegular { + hasher := md5.New() + reader := io.TeeReader(tr, hasher) + + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + if _, err := io.Copy(tw.tw, reader); err != nil { + return err + } + + archivePath := strings.TrimRight(hdr.Name, "/") + tw.md5s = append(tw.md5s, md5Entry{ + archivePath: archivePath, + hexSum: hex.EncodeToString(hasher.Sum(nil)), + }) + } else { + if err := tw.tw.WriteHeader(hdr); err != nil { + return err + } + if _, err := io.Copy(tw.tw, tr); err != nil { + return err + } + } + tw.markWritten(hdr.Name, hdr.Typeflag) + } +} + +// mergeDeb extracts the data.tar.* payload from a Debian .deb (ar archive) +// and merges it into the output tar. +func (tw *TarWriter) mergeDeb(debPath string) error { + f, err := os.Open(debPath) + if err != nil { + return err + } + defer f.Close() + + // Verify ar magic. + magic := make([]byte, 8) + if _, err := io.ReadFull(f, magic); err != nil { + return fmt.Errorf("%s: cannot read ar magic: %w", debPath, err) + } + if string(magic) != "!\n" { + return fmt.Errorf("%s: not a valid ar archive", debPath) + } + + // Scan ar members for data.tar.*. + const arHdrSize = 60 + for { + hdr := make([]byte, arHdrSize) + n, err := io.ReadFull(f, hdr) + if err == io.EOF || n == 0 { + break + } + if err != nil { + return fmt.Errorf("%s: reading ar header: %w", debPath, err) + } + + name := strings.TrimRight(string(hdr[0:16]), " ") + sizeStr := strings.TrimRight(string(hdr[48:58]), " ") + size, err := strconv.ParseInt(sizeStr, 10, 64) + if err != nil { + return fmt.Errorf("%s: invalid ar member size: %w", debPath, err) + } + + if !strings.HasPrefix(name, "data.") { + skip := size + if size%2 != 0 { + skip++ + } + if _, err := f.Seek(skip, io.SeekCurrent); err != nil { + return err + } + continue + } + + // Found the data member; open a tar reader over it. + lr := io.LimitReader(f, size) + tr, cleanup, err := openTarReaderFromStream(lr, name) + if err != nil { + return fmt.Errorf("%s: opening data member: %w", debPath, err) + } + defer cleanup() + return tw.mergeTarReader(tr) + } + return fmt.Errorf("%s: no data.tar.* member found", debPath) +} + +// — — — tar reader helpers — — — + +// openTarReader opens a (possibly compressed) tar file by extension. +func openTarReader(tarPath string) (*tar.Reader, func(), error) { + f, err := os.Open(tarPath) + if err != nil { + return nil, nil, err + } + tr, cleanup, err := openTarReaderFromStream(f, tarPath) + if err != nil { + f.Close() + return nil, nil, err + } + outer := cleanup + return tr, func() { outer(); f.Close() }, nil +} + +// openTarReaderFromStream wraps r with a decompressor inferred from name's extension. +func openTarReaderFromStream(r io.Reader, name string) (*tar.Reader, func(), error) { + lower := strings.ToLower(name) + switch { + case strings.HasSuffix(lower, ".tar.gz") || strings.HasSuffix(lower, ".tgz"): + gr, err := gzip.NewReader(r) + if err != nil { + return nil, nil, err + } + return tar.NewReader(gr), func() { gr.Close() }, nil + case strings.HasSuffix(lower, ".tar.xz") || strings.HasSuffix(lower, ".txz"): + xr, err := xz.NewReader(r) + if err != nil { + return nil, nil, err + } + return tar.NewReader(xr), func() {}, nil + case strings.HasSuffix(lower, ".tar.bz2") || strings.HasSuffix(lower, ".tbz2"): + br := bzip2.NewReader(r) + return tar.NewReader(br), func() {}, nil + default: + return tar.NewReader(r), func() {}, nil + } +} + +// — — — MD5 sidecar output — — — + +func (tw *TarWriter) writeMd5sums(outPath string) error { + f, err := os.Create(outPath) + if err != nil { + return err + } + defer f.Close() + + w := bufio.NewWriter(f) + for _, entry := range tw.md5s { + // Two-space separator matches md5sum(1) output format. + fmt.Fprintf(w, "%s %s\n", entry.hexSum, entry.archivePath) + } + return w.Flush() +} + +// — — — stamp file helper — — — + +// readTimestampFromStampFile reads BUILD_TIMESTAMP from a Bazel volatile +// status file. Returns 0 if the key is not present. +func readTimestampFromStampFile(path string) (int64, error) { + data, err := os.ReadFile(path) + if err != nil { + return 0, err + } + for _, line := range strings.Split(string(data), "\n") { + k, v, ok := strings.Cut(line, " ") + if ok && k == "BUILD_TIMESTAMP" { + ts, err := strconv.ParseInt(strings.TrimSpace(v), 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid BUILD_TIMESTAMP %q: %w", v, err) + } + return ts, nil + } + } + return 0, nil +} diff --git a/datadog_test/dd_tar_writer/main_test.go b/datadog_test/dd_tar_writer/main_test.go new file mode 100644 index 00000000..3897d628 --- /dev/null +++ b/datadog_test/dd_tar_writer/main_test.go @@ -0,0 +1,739 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +package main + +import ( + "archive/tar" + "bufio" + "compress/gzip" + "crypto/md5" + "encoding/hex" + "io" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/ulikunitz/xz" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// — — — test helpers — — — + +// writeFile creates a file in dir with the given content and returns its path. +func writeFile(t *testing.T, dir, name, content string) string { + t.Helper() + p := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(p), 0o755)) + require.NoError(t, os.WriteFile(p, []byte(content), 0o644)) + return p +} + +// writeExec creates an executable file and returns its path. +func writeExec(t *testing.T, dir, name, content string) string { + t.Helper() + p := writeFile(t, dir, name, content) + require.NoError(t, os.Chmod(p, 0o755)) + return p +} + +// tarEntries reads all headers from a plain (uncompressed) tar file. +func tarEntries(t *testing.T, tarPath string) []*tar.Header { + t.Helper() + f, err := os.Open(tarPath) + require.NoError(t, err) + defer f.Close() + return readTarEntries(t, tar.NewReader(f)) +} + +// gzipTarEntries reads all headers from a .tar.gz file. +func gzipTarEntries(t *testing.T, tarPath string) []*tar.Header { + t.Helper() + f, err := os.Open(tarPath) + require.NoError(t, err) + defer f.Close() + gr, err := gzip.NewReader(f) + require.NoError(t, err) + defer gr.Close() + return readTarEntries(t, tar.NewReader(gr)) +} + +// xzTarEntries reads all headers from a .tar.xz file. +func xzTarEntries(t *testing.T, tarPath string) []*tar.Header { + t.Helper() + f, err := os.Open(tarPath) + require.NoError(t, err) + defer f.Close() + xr, err := xz.NewReader(f) + require.NoError(t, err) + return readTarEntries(t, tar.NewReader(xr)) +} + +func readTarEntries(t *testing.T, tr *tar.Reader) []*tar.Header { + t.Helper() + var hdrs []*tar.Header + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + require.NoError(t, err) + h := *hdr + hdrs = append(hdrs, &h) + } + return hdrs +} + +// tarContent reads the content of a named entry from a plain tar file. +func tarContent(t *testing.T, tarPath, entryName string) string { + t.Helper() + f, err := os.Open(tarPath) + require.NoError(t, err) + defer f.Close() + tr := tar.NewReader(f) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + require.NoError(t, err) + if hdr.Name == entryName { + data, err := io.ReadAll(tr) + require.NoError(t, err) + return string(data) + } + } + t.Fatalf("entry %q not found in %s", entryName, tarPath) + return "" +} + +// parseMd5sums reads a md5sums file into a map[path]hash. +func parseMd5sums(t *testing.T, path string) map[string]string { + t.Helper() + f, err := os.Open(path) + require.NoError(t, err) + defer f.Close() + + result := make(map[string]string) + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + // Format: " " + parts := strings.SplitN(line, " ", 2) + require.Len(t, parts, 2, "invalid md5sums line: %q", line) + result[parts[1]] = parts[0] + } + require.NoError(t, scanner.Err()) + return result +} + +// md5OfString computes the hex MD5 of a string. +func md5OfString(s string) string { + h := md5.New() + h.Write([]byte(s)) + return hex.EncodeToString(h.Sum(nil)) +} + +// defaultConfig returns a minimal Config for testing. +func defaultConfig(output, md5Out string) Config { + return Config{ + Output: output, + Md5sumsOutput: md5Out, + DefaultMtime: portableMtime, + DefaultIDs: [2]int{0, 0}, + DefaultNames: [2]string{"", ""}, + ModeMap: make(map[string]os.FileMode), + IDsMap: make(map[string][2]int), + NamesMap: make(map[string][2]string), + CompressionLevel: -1, + } +} + +// — — — tests — — — + +func TestAddFile_BasicContent(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "hello.txt", "hello world") + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"opt/agent/hello.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + // Verify tar entry. + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, "opt/agent/hello.txt", hdrs[0].Name) + assert.Equal(t, tar.TypeReg, rune(hdrs[0].Typeflag)) + assert.Equal(t, int64(11), hdrs[0].Size) + assert.Equal(t, portableMtime, hdrs[0].ModTime.Unix(), "mtime should be portable mtime") + assert.Equal(t, int64(0o644), hdrs[0].Mode) + + // Verify content. + assert.Equal(t, "hello world", tarContent(t, out, "opt/agent/hello.txt")) + + // Verify MD5 sidecar. + sums := parseMd5sums(t, md5Out) + assert.Equal(t, md5OfString("hello world"), sums["opt/agent/hello.txt"]) +} + +func TestAddFile_ExecutableModeDerivation(t *testing.T) { + dir := t.TempDir() + src := writeExec(t, dir, "agent", "#!/bin/sh\n") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"bin/agent","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, int64(0o755), hdrs[0].Mode, "executable file should get mode 0755") +} + +func TestAddFile_ModeOverride(t *testing.T) { + dir := t.TempDir() + src := writeExec(t, dir, "agent", "#!/bin/sh\n") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.ModeMap = map[string]os.FileMode{"bin/agent": 0o750} + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"bin/agent","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, int64(0o750), hdrs[0].Mode) +} + +func TestAddFile_ManifestModeOverride(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "config.yaml", "key: val") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + // Manifest mode takes highest precedence. + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"etc/config.yaml","src":"`+src+`","mode":"0600","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + assert.Equal(t, int64(0o600), hdrs[0].Mode) +} + +func TestAddFile_OwnerOverride(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "f.txt", "data") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.IDsMap = map[string][2]int{"data/f.txt": {1000, 1001}} + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"data/f.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + assert.Equal(t, 1000, hdrs[0].Uid) + assert.Equal(t, 1001, hdrs[0].Gid) +} + +func TestAddSymlink_ExcludedFromMd5(t *testing.T) { + dir := t.TempDir() + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"symlink","dest":"opt/agent/current","src":"/opt/agent/7.0.0","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, tar.TypeSymlink, rune(hdrs[0].Typeflag)) + assert.Equal(t, "/opt/agent/7.0.0", hdrs[0].Linkname) + + // Symlink must NOT appear in md5sums. + sums := parseMd5sums(t, md5Out) + assert.Empty(t, sums) +} + +func TestAddDir_ExcludedFromMd5(t *testing.T) { + dir := t.TempDir() + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"dir","dest":"opt/agent/logs","src":"","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, tar.TypeDir, rune(hdrs[0].Typeflag)) + assert.True(t, strings.HasSuffix(hdrs[0].Name, "/")) + + // Directory must NOT appear in md5sums. + sums := parseMd5sums(t, md5Out) + assert.Empty(t, sums) +} + +func TestAddEmptyFile_Md5OfEmpty(t *testing.T) { + dir := t.TempDir() + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"empty-file","dest":"opt/agent/placeholder","src":"","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + sums := parseMd5sums(t, md5Out) + require.Contains(t, sums, "opt/agent/placeholder") + assert.Equal(t, "d41d8cd98f00b204e9800998ecf8427e", sums["opt/agent/placeholder"], "md5 of empty string") +} + +func TestDirectoryPrefix(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "agent", "binary") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.Directory = "opt/datadog-agent" + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"bin/agent","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, "opt/datadog-agent/bin/agent", hdrs[0].Name) +} + +func TestCreateParents(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "agent", "binary") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.CreateParents = true + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"opt/agent/bin/agent","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + names := make([]string, len(hdrs)) + for i, h := range hdrs { + names[i] = h.Name + } + assert.Contains(t, names, "opt/") + assert.Contains(t, names, "opt/agent/") + assert.Contains(t, names, "opt/agent/bin/") + assert.Contains(t, names, "opt/agent/bin/agent") +} + +func TestDuplicateFile_FirstWins(t *testing.T) { + dir := t.TempDir() + src1 := writeFile(t, dir, "v1.txt", "version one") + src2 := writeFile(t, dir, "v2.txt", "version two") + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"data/file.txt","src":"`+src1+`","mode":"","user":"","group":""}, + {"type":"file","dest":"data/file.txt","src":"`+src2+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + // Only one entry in the tar. + hdrs := tarEntries(t, out) + count := 0 + for _, h := range hdrs { + if h.Name == "data/file.txt" { + count++ + } + } + assert.Equal(t, 1, count) + + // Content should be the first file. + assert.Equal(t, "version one", tarContent(t, out, "data/file.txt")) + + // MD5 should also be for the first file. + sums := parseMd5sums(t, md5Out) + assert.Equal(t, md5OfString("version one"), sums["data/file.txt"]) +} + +func TestGzipCompression(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "data.txt", "compressed content") + out := filepath.Join(dir, "out.tar.gz") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.Compression = "gz" + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"data.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + // Read back via gzip. + hdrs := gzipTarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, "data.txt", hdrs[0].Name) + + sums := parseMd5sums(t, md5Out) + assert.Equal(t, md5OfString("compressed content"), sums["data.txt"]) +} + +func TestXzCompression(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "data.txt", "xz content") + out := filepath.Join(dir, "out.tar.xz") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.Compression = "xz" + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"data.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := xzTarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, "data.txt", hdrs[0].Name) + + sums := parseMd5sums(t, md5Out) + assert.Equal(t, md5OfString("xz content"), sums["data.txt"]) +} + +func TestMergeTar(t *testing.T) { + dir := t.TempDir() + + // Create a small tar to merge. + mergeSrc := filepath.Join(dir, "merge.tar") + func() { + f, err := os.Create(mergeSrc) + require.NoError(t, err) + defer f.Close() + tw := tar.NewWriter(f) + defer tw.Close() + content := []byte("from merged tar") + require.NoError(t, tw.WriteHeader(&tar.Header{ + Typeflag: tar.TypeReg, + Name: "merged/file.txt", + Size: int64(len(content)), + Mode: 0o644, + ModTime: time.Unix(portableMtime, 0).UTC(), + })) + _, err = tw.Write(content) + require.NoError(t, err) + }() + + src := writeFile(t, dir, "local.txt", "local content") + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.Tars = []string{mergeSrc} + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"local.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + names := make(map[string]bool) + for _, h := range hdrs { + names[h.Name] = true + } + assert.True(t, names["local.txt"]) + assert.True(t, names["merged/file.txt"]) + + sums := parseMd5sums(t, md5Out) + assert.Equal(t, md5OfString("local content"), sums["local.txt"]) + assert.Equal(t, md5OfString("from merged tar"), sums["merged/file.txt"]) +} + +func TestAddTree(t *testing.T) { + dir := t.TempDir() + + // Create a tree of files. + treeRoot := filepath.Join(dir, "tree") + writeFile(t, treeRoot, "a.txt", "aaa") + writeFile(t, treeRoot, "sub/b.txt", "bbb") + require.NoError(t, os.MkdirAll(filepath.Join(treeRoot, "emptydir"), 0o755)) + + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"tree","dest":"opt/agent","src":"`+treeRoot+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + names := make(map[string]bool) + for _, h := range hdrs { + names[h.Name] = true + } + assert.True(t, names["opt/agent/a.txt"]) + assert.True(t, names["opt/agent/sub/b.txt"]) + + sums := parseMd5sums(t, md5Out) + assert.Equal(t, md5OfString("aaa"), sums["opt/agent/a.txt"]) + assert.Equal(t, md5OfString("bbb"), sums["opt/agent/sub/b.txt"]) + // Directories not in md5sums. + assert.NotContains(t, sums, "opt/agent/emptydir") +} + +func TestGnuTarFormat(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "f.txt", "x") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"f.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + // FormatGNU or FormatUnknown (unknown after round-trip read is acceptable). + assert.True(t, hdrs[0].Format == tar.FormatGNU || hdrs[0].Format == tar.FormatUnknown, + "expected GNU format, got %v", hdrs[0].Format) +} + +func TestExpandArgs_ResponseFile(t *testing.T) { + dir := t.TempDir() + paramFile := filepath.Join(dir, "args.txt") + require.NoError(t, os.WriteFile(paramFile, []byte("--output\nout.tar\n\n--compression\ngz\n"), 0o644)) + + expanded, err := expandArgs([]string{"@" + paramFile, "--manifest", "m.json"}) + require.NoError(t, err) + assert.Equal(t, []string{"--output", "out.tar", "--compression", "gz", "--manifest", "m.json"}, expanded) +} + +func TestPortableMtime(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "f.txt", "data") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.DefaultMtime = portableMtime + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"f.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + require.Len(t, hdrs, 1) + assert.Equal(t, portableMtime, hdrs[0].ModTime.Unix(), "mtime should be portable mtime") +} + +func TestMd5sumsFormat(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "f.txt", "hello") + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"opt/f.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + raw, err := os.ReadFile(md5Out) + require.NoError(t, err) + line := strings.TrimSpace(string(raw)) + // Must be "<32hexchars> " with exactly two spaces. + assert.Regexp(t, `^[0-9a-f]{32} opt/f\.txt$`, line) +} + +func TestNoMd5sumsOutputFlag(t *testing.T) { + dir := t.TempDir() + src := writeFile(t, dir, "f.txt", "data") + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") // no md5sums output + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"file","dest":"f.txt","src":"`+src+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + // tar should exist; no md5sums file should be created. + _, err := os.Stat(out) + assert.NoError(t, err) +} + +// indexOf returns the position of name in names, or -1 if absent. +func indexOf(names []string, name string) int { + for i, n := range names { + if n == name { + return i + } + } + return -1 +} + +// TestAddDir_AutoCreatesParents verifies that a "dir" manifest entry whose +// parent directories do not yet exist in the archive causes those parents to be +// written first when create_parents is set. +func TestAddDir_AutoCreatesParents(t *testing.T) { + dir := t.TempDir() + out := filepath.Join(dir, "out.tar") + + cfg := defaultConfig(out, "") + cfg.CreateParents = true + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"dir","dest":"a/b/c","src":"","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + names := make([]string, len(hdrs)) + for i, h := range hdrs { + names[i] = h.Name + } + + require.Contains(t, names, "a/") + require.Contains(t, names, "a/b/") + require.Contains(t, names, "a/b/c/") + assert.Less(t, indexOf(names, "a/"), indexOf(names, "a/b/"), "a/ must precede a/b/") + assert.Less(t, indexOf(names, "a/b/"), indexOf(names, "a/b/c/"), "a/b/ must precede a/b/c/") +} + +// TestParentOrdering_InstallerLike reproduces the sort-order regression seen +// with the installer package. The manifest has two dir entries that sort +// before a file entry; the dir entries introduce an implied parent +// (embedded/) that was not otherwise listed. Without the fix, embedded/ +// appears after its children embedded/bin/ and embedded/lib/. +// +// Manifest (as rules_pkg write_manifest would sort it): +// +// opt/datadog-installer/embedded/bin ← dir (from pkg_mkdirs) +// opt/datadog-installer/embedded/lib ← dir (from pkg_mkdirs) +// opt/datadog-installer/version-manifest.json ← file +func TestParentOrdering_InstallerLike(t *testing.T) { + dir := t.TempDir() + vmanifest := writeFile(t, dir, "version-manifest.json", `{"version":"7"}`) + out := filepath.Join(dir, "out.tar") + md5Out := filepath.Join(dir, "out.md5sums") + + cfg := defaultConfig(out, md5Out) + cfg.CreateParents = true + cfg.ManifestPath = filepath.Join(dir, "manifest.json") + // Entries in the order write_manifest would produce (lexicographic by dest). + require.NoError(t, os.WriteFile(cfg.ManifestPath, []byte(`[ + {"type":"dir", "dest":"opt/datadog-installer/embedded/bin","src":"","mode":"","user":"","group":""}, + {"type":"dir", "dest":"opt/datadog-installer/embedded/lib","src":"","mode":"","user":"","group":""}, + {"type":"file","dest":"opt/datadog-installer/version-manifest.json","src":"`+vmanifest+`","mode":"","user":"","group":""} + ]`), 0o644)) + + require.NoError(t, run(cfg)) + + hdrs := tarEntries(t, out) + names := make([]string, len(hdrs)) + for i, h := range hdrs { + names[i] = h.Name + } + + // All expected entries are present. + assert.Contains(t, names, "opt/") + assert.Contains(t, names, "opt/datadog-installer/") + assert.Contains(t, names, "opt/datadog-installer/embedded/") + assert.Contains(t, names, "opt/datadog-installer/embedded/bin/") + assert.Contains(t, names, "opt/datadog-installer/embedded/lib/") + assert.Contains(t, names, "opt/datadog-installer/version-manifest.json") + + // Parent directories precede their children. + assert.Less(t, indexOf(names, "opt/"), indexOf(names, "opt/datadog-installer/"), + "opt/ must precede opt/datadog-installer/") + assert.Less(t, indexOf(names, "opt/datadog-installer/"), indexOf(names, "opt/datadog-installer/embedded/"), + "opt/datadog-installer/ must precede opt/datadog-installer/embedded/") + assert.Less(t, indexOf(names, "opt/datadog-installer/embedded/"), indexOf(names, "opt/datadog-installer/embedded/bin/"), + "embedded/ must precede embedded/bin/") + assert.Less(t, indexOf(names, "opt/datadog-installer/embedded/"), indexOf(names, "opt/datadog-installer/embedded/lib/"), + "embedded/ must precede embedded/lib/") + + // md5sums contains only the regular file, not directories. + sums := parseMd5sums(t, md5Out) + assert.Contains(t, sums, "opt/datadog-installer/version-manifest.json") + assert.Len(t, sums, 1) +} + +func TestParseFlags_RequiresOutput(t *testing.T) { + _, err := parseFlags([]string{"--manifest", "m.json"}) + assert.ErrorContains(t, err, "--output") +} + +func TestParseFlags_InvalidMode(t *testing.T) { + _, err := parseFlags([]string{"--output", "out.tar", "--mode", "9999"}) + assert.Error(t, err) +} + +func TestParseFlags_InvalidOwner(t *testing.T) { + _, err := parseFlags([]string{"--output", "out.tar", "--owner", "notanumber"}) + assert.Error(t, err) +} diff --git a/datadog_test/rules_pkg/generate_patches.sh b/datadog_test/rules_pkg/generate_patches.sh new file mode 100755 index 00000000..4234f7b4 --- /dev/null +++ b/datadog_test/rules_pkg/generate_patches.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Generate patch files for third_party/rules_pkg. +# +# Downloads the upstream source file at the exact commit pinned in MODULE.bazel, +# diffs it against our modified version, and writes the result to patches/. +# +# Usage (from repo root): +# bash third_party/rules_pkg/generate_patches.sh +# +# When updating the upstream commit in MODULE.bazel: +# 1. Update COMMIT below to match the new commit. +# 2. Update the modified source file(s) in third_party/rules_pkg/ to reflect +# any upstream changes while preserving our modifications. +# 3. Re-run this script to regenerate the patch(es). +# 4. Commit the updated source and patch files together. + +set -euo pipefail + +# Must match the commit in the rules_pkg git_override() in MODULE.bazel. +COMMIT="401969d4367c42dcbb45d33a637eae87788d025e" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PATCH_DIR="${SCRIPT_DIR}/patches" + +generate_patch() { + local upstream_path="$1" # path within the rules_pkg repo, e.g. pkg/private/tar/tar.bzl + local local_file="$2" # path to our modified version (absolute) + local patch_name="$3" # output patch filename (no directory) + + local upstream_url="https://raw.githubusercontent.com/bazelbuild/rules_pkg/${COMMIT}/${upstream_path}" + local tmp + tmp="$(mktemp)" + trap "rm -f '${tmp}'" EXIT + + echo "Downloading ${upstream_url} ..." + curl -fsSL "${upstream_url}" -o "${tmp}" + + echo "Generating ${patch_name} ..." + # diff exits 1 when files differ (which is expected here). + diff -u \ + --label "a/${upstream_path}" \ + --label "b/${upstream_path}" \ + "${tmp}" \ + "${local_file}" \ + > "${PATCH_DIR}/${patch_name}" || true + + if [ ! -s "${PATCH_DIR}/${patch_name}" ]; then + echo "WARNING: patch is empty — local file matches upstream exactly." + else + echo " -> ${PATCH_DIR}/${patch_name}" + fi +} + +generate_patch \ + "pkg/private/tar/tar.bzl" \ + "${SCRIPT_DIR}/pkg/private/tar/tar.bzl" \ + "tar_bzl.patch" + +echo "Done." diff --git a/datadog_test/rules_pkg/patches/BUILD.bazel b/datadog_test/rules_pkg/patches/BUILD.bazel new file mode 100644 index 00000000..4e409814 --- /dev/null +++ b/datadog_test/rules_pkg/patches/BUILD.bazel @@ -0,0 +1,2 @@ +# Package delimiter — patch files in this directory are referenced as Bazel +# labels by the git_override() in MODULE.bazel. diff --git a/datadog_test/rules_pkg/patches/tar_bzl.patch b/datadog_test/rules_pkg/patches/tar_bzl.patch new file mode 100644 index 00000000..d902728c --- /dev/null +++ b/datadog_test/rules_pkg/patches/tar_bzl.patch @@ -0,0 +1,48 @@ +--- a/pkg/private/tar/tar.bzl ++++ b/pkg/private/tar/tar.bzl +@@ -57,9 +57,13 @@ + files = [] + outputs, output_file, _ = setup_output_files(ctx) + ++ # Declare the md5sums sidecar output. ++ md5sums_file = ctx.actions.declare_file(ctx.label.name + ".md5sums") ++ + # Start building the arguments. + args = ctx.actions.args() + args.add("--output", output_file.path) ++ args.add("--md5sums_output", md5sums_file.path) + args.add("--mode", ctx.attr.mode) + args.add("--owner", ctx.attr.owner) + args.add("--owner_name", ctx.attr.ownername) +@@ -198,7 +202,7 @@ + tools = [ctx.executable.compressor] if ctx.executable.compressor else [], + executable = ctx.executable._build_tar, + arguments = [args], +- outputs = [output_file], ++ outputs = [output_file, md5sums_file], + env = { + "LANG": "en_US.UTF-8", + "LC_CTYPE": "UTF-8", +@@ -218,6 +222,7 @@ + # Depend on it at your own risk! + OutputGroupInfo( + manifest = [manifest_file], ++ md5sums = depset([md5sums_file]), + ), + ] + +@@ -327,8 +332,13 @@ + "private_stamp_detect": attr.bool(default = False), + + # Implicit dependencies. ++ # Points to dd_tar_writer, a Go binary that replaces the upstream Python ++ # build_tar tool and additionally emits a .md5sums sidecar file. ++ # Label uses @@// (canonical root-module prefix in bzlmod) so that this ++ # reference resolves to the main repository even though this file lives ++ # inside the rules_pkg module. + "_build_tar": attr.label( +- default = Label("//pkg/private/tar:build_tar"), ++ default = Label("@@//bazel/rules/dd_tar_writer:dd_tar_writer"), + cfg = "exec", + executable = True, + allow_files = True, diff --git a/datadog_test/rules_pkg/pkg/private/tar/tar.bzl b/datadog_test/rules_pkg/pkg/private/tar/tar.bzl new file mode 100644 index 00000000..ac017058 --- /dev/null +++ b/datadog_test/rules_pkg/pkg/private/tar/tar.bzl @@ -0,0 +1,376 @@ +# Copyright 2015 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rules for making .tar files.""" + +load("//pkg:providers.bzl", "PackageVariablesInfo") +load( + "//pkg/private:pkg_files.bzl", + "add_directory", + "add_empty_file", + "add_label_list", + "add_single_file", + "add_symlink", + "create_mapping_context_from_ctx", + "write_manifest", +) +load("//pkg/private:util.bzl", "get_stamp_detect", "setup_output_files", "substitute_package_variables") + +# TODO(aiuto): Figure out how to get this from the python toolchain. +# See check for lzma in archive.py for a hint at a method. +HAS_XZ_SUPPORT = True + +# Filetype to restrict inputs +tar_filetype = ( + [".tar", ".tar.gz", ".tgz", ".tar.bz2", "tar.xz", ".txz"] if HAS_XZ_SUPPORT else [".tar", ".tar.gz", ".tgz", ".tar.bz2"] +) +SUPPORTED_TAR_COMPRESSIONS = ( + ["", "gz", "bz2", "xz"] if HAS_XZ_SUPPORT else ["", "gz", "bz2"] +) +_DEFAULT_MTIME = -1 + +def _remap(remap_paths, path): + """If path starts with a key in remap_paths, rewrite it.""" + for prefix, replacement in remap_paths.items(): + if path.startswith(prefix): + return replacement + path[len(prefix):] + return path + +def _quote(filename, protect = "="): + """Quote the filename, by escaping = by \\= and \\ by \\\\""" + return filename.replace("\\", "\\\\").replace(protect, "\\" + protect) + +def _pkg_tar_impl(ctx): + """Implementation of the pkg_tar rule.""" + + # Files needed by rule implementation at runtime + files = [] + outputs, output_file, _ = setup_output_files(ctx) + + # Declare the md5sums sidecar output. + md5sums_file = ctx.actions.declare_file(ctx.label.name + ".md5sums") + + # Start building the arguments. + args = ctx.actions.args() + args.add("--output", output_file.path) + args.add("--md5sums_output", md5sums_file.path) + args.add("--mode", ctx.attr.mode) + args.add("--owner", ctx.attr.owner) + args.add("--owner_name", ctx.attr.ownername) + + # Package dir can be specified by a file or inlined. + if ctx.attr.package_dir_file: + if ctx.attr.package_dir: + fail("Both package_dir and package_dir_file attributes were specified") + args.add("--directory", "@" + ctx.file.package_dir_file.path) + files.append(ctx.file.package_dir_file) + else: + package_dir_expanded = substitute_package_variables(ctx, ctx.attr.package_dir) + args.add("--directory", package_dir_expanded or "/") + + if ctx.executable.compressor: + args.add("--compressor", "%s %s" % (ctx.executable.compressor.path, ctx.attr.compressor_args)) + else: + extension = ctx.attr.extension + if extension and extension != "tar": + compression = None + dot_pos = ctx.attr.extension.rfind(".") + if dot_pos >= 0: + compression = ctx.attr.extension[dot_pos + 1:] + else: + compression = ctx.attr.extension + if compression == "tgz": + compression = "gz" + if compression == "txz": + compression = "xz" + if compression: + if compression in SUPPORTED_TAR_COMPRESSIONS: + args.add("--compression", compression) + else: + fail("Unsupported compression: '%s'" % compression) + + if ctx.attr.mtime != _DEFAULT_MTIME: + if ctx.attr.portable_mtime: + fail("You may not set both mtime and portable_mtime") + args.add("--mtime", "%d" % ctx.attr.mtime) + if ctx.attr.portable_mtime: + args.add("--mtime", "portable") + if ctx.attr.modes: + for key in ctx.attr.modes: + args.add("--modes", "%s=%s" % (_quote(key), ctx.attr.modes[key])) + if ctx.attr.owners: + for key in ctx.attr.owners: + args.add("--owners", "%s=%s" % (_quote(key), ctx.attr.owners[key])) + if ctx.attr.ownernames: + for key in ctx.attr.ownernames: + args.add( + "--owner_names", + "%s=%s" % (_quote(key), ctx.attr.ownernames[key]), + ) + if ctx.attr.compression_level >= 0: + args.add("--compression_level", str(ctx.attr.compression_level)) + + # Now we begin processing the files. + path_mapper = None + if ctx.attr.remap_paths: + path_mapper = lambda path: _remap(ctx.attr.remap_paths, path) + + mapping_context = create_mapping_context_from_ctx( + ctx, + label = ctx.label, + include_runfiles = ctx.attr.include_runfiles, + strip_prefix = ctx.attr.strip_prefix, + # build_tar does the default modes. Consider moving attribute mapping + # into mapping_context. + default_mode = None, + path_mapper = path_mapper, + ) + + add_label_list(mapping_context, srcs = ctx.attr.srcs) + + # The files attribute is a map of labels to destinations. We can add them + # directly to the content map. + for target, f_dest_path in ctx.attr.files.items(): + target_files = target[DefaultInfo].files.to_list() + if len(target_files) != 1: + fail("Each input must describe exactly one file.", attr = "files") + mapping_context.file_deps_direct.append(target_files[0]) + add_single_file( + mapping_context, + f_dest_path, + target_files[0], + target.label, + ) + + for empty_file in ctx.attr.empty_files: + add_empty_file(mapping_context, empty_file, ctx.label) + for empty_dir in ctx.attr.empty_dirs or []: + add_directory(mapping_context, empty_dir, ctx.label) + for f in ctx.files.deps: + args.add("--tar", f.path) + for link in ctx.attr.symlinks: + add_symlink( + mapping_context, + link, + ctx.attr.symlinks[link], + ctx.label, + ) + if ctx.attr.stamp == 1 or (ctx.attr.stamp == -1 and + ctx.attr.private_stamp_detect): + args.add("--stamp_from", ctx.version_file.path) + files.append(ctx.version_file) + + manifest_file = ctx.actions.declare_file(ctx.label.name + ".manifest") + files.append(manifest_file) + write_manifest(ctx, manifest_file, mapping_context.content_map) + args.add("--manifest", manifest_file.path) + + args.set_param_file_format("flag_per_line") + args.use_param_file("@%s", use_always = False) + + if ctx.attr.create_parents: + args.add("--create_parents") + + if ctx.attr.allow_duplicates_from_deps: + args.add("--allow_dups_from_deps") + + if ctx.attr.preserve_mode: + args.add("--preserve_mode") + + if ctx.attr.preserve_mtime: + args.add("--preserve_mtime") + + inputs = depset( + direct = mapping_context.file_deps_direct + ctx.files.deps + files, + transitive = mapping_context.file_deps_transitive, + ) + + ctx.actions.run( + mnemonic = "PackageTar", + progress_message = "Writing: %s" % output_file.path, + inputs = inputs, + tools = [ctx.executable.compressor] if ctx.executable.compressor else [], + executable = ctx.executable._build_tar, + arguments = [args], + outputs = [output_file, md5sums_file], + env = { + "LANG": "en_US.UTF-8", + "LC_CTYPE": "UTF-8", + "PYTHONIOENCODING": "UTF-8", + "PYTHONUTF8": "1", + }, + use_default_shell_env = True, + ) + return [ + DefaultInfo( + files = depset([output_file]), + runfiles = ctx.runfiles(files = outputs), + ), + # NB: this is not a committed public API. + # The format of this file is subject to change without notice, + # or this OutputGroup might be totally removed. + # Depend on it at your own risk! + OutputGroupInfo( + manifest = [manifest_file], + md5sums = depset([md5sums_file]), + ), + ] + +# A rule for creating a tar file, see README.md +pkg_tar_impl = rule( + implementation = _pkg_tar_impl, + attrs = { + "strip_prefix": attr.string( + doc = """(note: Use strip_prefix = "." to strip path to the package but preserve relative paths of sub directories beneath the package.)""", + ), + "package_dir": attr.string( + doc = """Prefix to be prepend to all paths written. + + This is applied as a final step, while writing to the archive. + Any other attributes (e.g. symlinks) which specify a path, must do so relative to package_dir. + The value may contain variables. See [package_file_name](#package_file_name) for examples. + """, + ), + "package_dir_file": attr.label(allow_single_file = True), + "deps": attr.label_list( + doc = """tar files which will be unpacked and repacked into the archive.""", + allow_files = tar_filetype, + ), + "srcs": attr.label_list( + doc = """Inputs which will become part of the tar archive.""", + allow_files = True, + ), + "files": attr.label_keyed_string_dict( + doc = """Obsolete. Do not use.""", + allow_files = True, + ), + "mode": attr.string(default = "0555"), + "modes": attr.string_dict(), + "mtime": attr.int(default = _DEFAULT_MTIME), + "portable_mtime": attr.bool(default = True), + "owner": attr.string( + doc = """Default numeric owner.group to apply to files when not set via pkg_attributes.""", + default = "0.0", + ), + "ownername": attr.string(default = "."), + "owners": attr.string_dict(), + "ownernames": attr.string_dict(), + "extension": attr.string( + default = "tar", + doc = """The extension of the generated file. If `"gz"`, `"bz2"`, or `"xz"`, the +tarball will also be compressed using that tool, and is mutually exclusive with `compressor`. +Note that `xz` may not be supported based on the Python toolchain. +""", + ), + "symlinks": attr.string_dict(), + "empty_files": attr.string_list(), + "include_runfiles": attr.bool( + doc = ("""Include runfiles for executables. These appear as they would in bazel-bin.""" + + """ For example: 'path/to/myprog.runfiles/path/to/my_data.txt'."""), + ), + "empty_dirs": attr.string_list(), + "remap_paths": attr.string_dict(), + "compressor": attr.label( + doc = """External tool which can compress the archive.""", + executable = True, + cfg = "exec", + ), + "compressor_args": attr.string( + doc = """Arg list for `compressor`.""", + ), + "create_parents": attr.bool(default = True), + "allow_duplicates_from_deps": attr.bool(default = False), + "compression_level": attr.int( + doc = """Specify the numeric compression level in gzip mode; may be 0-9 or -1 (default to 6).""", + default = -1, + ), + + # Common attributes + "out": attr.output(mandatory = True), + "package_file_name": attr.string(doc = "See [Common Attributes](#package_file_name)"), + "package_variables": attr.label( + doc = "See [Common Attributes](#package_variables)", + providers = [PackageVariablesInfo], + ), + "allow_duplicates_with_different_content": attr.bool( + default = True, + doc = """If true, will allow you to reference multiple pkg_* which conflict +(writing different content or metadata to the same destination). +Such behaviour is always incorrect, but we provide a flag to support it in case old +builds were accidentally doing it. Never explicitly set this to true for new code. +""", + ), + "preserve_mode": attr.bool( + default = False, + doc = """If true, will add file to archive with preserved file permissions.""", + ), + "preserve_mtime": attr.bool( + default = False, + doc = """If true, will add file to archive with preserved file mtime.""", + ), + "stamp": attr.int( + doc = """Enable file time stamping. Possible values: +
  • stamp = 1: Use the time of the build as the modification time of each file in the archive. +
  • stamp = 0: Use an "epoch" time for the modification time of each file. This gives good build result caching. +
  • stamp = -1: Control the chosen modification time using the --[no]stamp flag. +@since(0.5.0) +""", + default = 0, + ), + # Is --stamp set on the command line? + # TODO(https://github.com/bazelbuild/rules_pkg/issues/340): Remove this. + "private_stamp_detect": attr.bool(default = False), + + # Implicit dependencies. + # Points to dd_tar_writer, a Go binary that replaces the upstream Python + # build_tar tool and additionally emits a .md5sums sidecar file. + # Label uses @@// (canonical root-module prefix in bzlmod) so that this + # reference resolves to the main repository even though this file lives + # inside the rules_pkg module. + "_build_tar": attr.label( + default = Label("@@//bazel/rules/dd_tar_writer:dd_tar_writer"), + cfg = "exec", + executable = True, + allow_files = True, + ), + }, +) + +# buildifier: disable=function-docstring-args +def pkg_tar(name, **kwargs): + """Creates a .tar file. See pkg_tar_impl. + + @wraps(pkg_tar_impl) + """ + + # Compatibility with older versions of pkg_tar that define files as + # a flat list of labels. + if "srcs" not in kwargs: + if "files" in kwargs: + if not hasattr(kwargs["files"], "items"): + label = "%s//%s:%s" % (native.repository_name(), native.package_name(), name) + + # buildifier: disable=print + print("%s: you provided a non dictionary to the pkg_tar `files` attribute. " % (label,) + + "This attribute was renamed to `srcs`. " + + "Consider renaming it in your BUILD file.") + kwargs["srcs"] = kwargs.pop("files") + extension = kwargs.get("extension") or "tar" + if extension[0] == ".": + extension = extension[1:] + pkg_tar_impl( + name = name, + out = kwargs.pop("out", None) or (name + "." + extension), + private_stamp_detect = get_stamp_detect(kwargs.get("stamp", 0)), + **kwargs + ) diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..554daa25 --- /dev/null +++ b/go.mod @@ -0,0 +1,11 @@ +module github.com/bazelbuild/rules_pkg + +go 1.24.2 + +require ( + github.com/davecgh/go-spew v1.1.1 + github.com/pmezard/go-difflib v1.0.0 + github.com/stretchr/testify v1.11.1 + github.com/ulikunitz/xz v0.5.15 + gopkg.in/yaml.v3 v3.0.1 +) diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..1b4946ef --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY= +github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/private/tar/tar.bzl b/pkg/private/tar/tar.bzl index e2b23a39..d399397a 100644 --- a/pkg/private/tar/tar.bzl +++ b/pkg/private/tar/tar.bzl @@ -57,9 +57,13 @@ def _pkg_tar_impl(ctx): files = [] outputs, output_file, _ = setup_output_files(ctx) + # Declare the md5sums sidecar output. + md5sums_file = ctx.actions.declare_file(ctx.label.name + ".md5sums") + # Start building the arguments. args = ctx.actions.args() args.add("--output", output_file.path) + args.add("--md5sums_output", md5sums_file.path) args.add("--mode", ctx.attr.mode) args.add("--owner", ctx.attr.owner) args.add("--owner_name", ctx.attr.ownername) @@ -198,7 +202,7 @@ def _pkg_tar_impl(ctx): tools = [ctx.executable.compressor] if ctx.executable.compressor else [], executable = ctx.executable._build_tar, arguments = [args], - outputs = [output_file], + outputs = [output_file, md5sums_file], env = { "LANG": "en_US.UTF-8", "LC_CTYPE": "UTF-8", @@ -218,6 +222,7 @@ def _pkg_tar_impl(ctx): # Depend on it at your own risk! OutputGroupInfo( manifest = [manifest_file], + md5sums = depset([md5sums_file]), ), ] @@ -327,8 +332,13 @@ builds were accidentally doing it. Never explicitly set this to true for new cod "private_stamp_detect": attr.bool(default = False), # Implicit dependencies. + # Points to dd_tar_writer, a Go binary that replaces the upstream Python + # build_tar tool and additionally emits a .md5sums sidecar file. + # Label uses @@// (canonical root-module prefix in bzlmod) so that this + # reference resolves to the main repository even though this file lives + # inside the rules_pkg module. "_build_tar": attr.label( - default = Label("//pkg/private/tar:build_tar"), + default = Label("//datadog_test/dd_tar_writer:dd_tar_writer"), cfg = "exec", executable = True, allow_files = True, diff --git a/tests/tar/pkg_tar_test.py b/tests/tar/pkg_tar_test.py index 7923ab67..900d07c5 100644 --- a/tests/tar/pkg_tar_test.py +++ b/tests/tar/pkg_tar_test.py @@ -308,7 +308,9 @@ def test_compression_level(self): for file_name, expected_size in cases: file_path = runfiles.Create().Rlocation('rules_pkg/tests/tar/' + file_name) file_size = os.stat(file_path).st_size - self.assertEqual(file_size, expected_size, 'size error for ' + file_name) + # Fuzzy test for size because compression can vary by compression library. + # We want to be less than expected or no more that 1% larger. + self.assertLessEqual(file_size, expected_size * 1.01, 'size error for ' + file_name) def test_preserve_mode(self): if os.name == 'nt':