From 0b35736843db8669067e9c940024dd1fc1f1b58a Mon Sep 17 00:00:00 2001 From: bfjelds Date: Thu, 28 May 2026 12:58:38 -0700 Subject: [PATCH] e2e: add UsePreviouslyBuiltVHD for AB update test Add UsePreviouslyBuiltVHD flag to scenario Config that resolves the VHD to the most recent prior main-branch build in the build gallery, excluding the current pipeline run. This avoids the UUID collision where the current build's VHD and COSI share filesystem UUIDs. New PreviouslyBuiltSIGVersion function on AzureClient: - Lists all versions in the build gallery for the image definition - Filters to branch=refs/heads/main, provisioning Succeeded/Updating - Excludes current build by matching Config.BuildID against buildId tag - Sorts by parsed 'now' tag (unix epoch) for deterministic ordering - Supports PREVIOUSLY_BUILT_VHD_BUILD_ID env var override for debugging Test_ACL_ABUpdate now sets UsePreviouslyBuiltVHD: true to boot from a prior build before applying the current pipeline's COSI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- e2e/cache.go | 7 +-- e2e/config/azure.go | 122 +++++++++++++++++++++++++++++++++++++++++++ e2e/config/config.go | 4 ++ e2e/config/vhd.go | 9 +++- e2e/scenario_test.go | 5 +- e2e/test_helpers.go | 5 +- e2e/types.go | 11 +++- 7 files changed, 153 insertions(+), 10 deletions(-) diff --git a/e2e/cache.go b/e2e/cache.go index 1b07d383815..91a2d64d5e1 100644 --- a/e2e/cache.go +++ b/e2e/cache.go @@ -214,14 +214,15 @@ func isNotFoundErr(err error) bool { var CachedPrepareVHD = cachedFunc(prepareVHD) type GetVHDRequest struct { - Location string - Image config.Image + Location string + Image config.Image + UsePreviouslyBuiltVHD bool } // prepareVHD retrieves the Azure resource ID for a VHD image. A gallery is scanned for the correct version // and replicated to the location specified in the request if it does not already exist. func prepareVHD(ctx context.Context, request GetVHDRequest) (config.VHDResourceID, error) { - return config.GetVHDResourceID(ctx, request.Image, request.Location) + return config.GetVHDResourceID(ctx, request.Image, request.Location, request.UsePreviouslyBuiltVHD) } var CachedEnsureResourceGroup = cachedFunc(ensureResourceGroup) diff --git a/e2e/config/azure.go b/e2e/config/azure.go index d0de6f04619..ba7e0970d35 100644 --- a/e2e/config/azure.go +++ b/e2e/config/azure.go @@ -557,6 +557,128 @@ func (a *AzureClient) LatestSIGImageVersionByTag(ctx context.Context, image *Ima return VHDResourceID(*latestVersion.ID), nil } +// getTagValueCI performs a case-insensitive lookup of a tag value from a gallery image version's tags. +func getTagValueCI(tags map[string]*string, key string) (string, bool) { + for k, v := range tags { + if strings.EqualFold(k, key) && v != nil { + return *v, true + } + } + return "", false +} + +// PreviouslyBuiltSIGVersion resolves the most recent prior main-branch build +// in the build gallery, excluding the current pipeline run. If +// Config.PreviouslyBuiltVHDBuildID is set, it pins to that specific build instead. +func (a *AzureClient) PreviouslyBuiltSIGVersion(ctx context.Context, image *Image, location string) (VHDResourceID, error) { + toolkit.Logf(ctx, "Looking up previously built SIG version in %s", image.azurePortalImageUrl()) + + imageVersionsClient, err := armcompute.NewGalleryImageVersionsClient(image.Gallery.SubscriptionID, a.Credential, a.ArmOptions) + if err != nil { + return "", fmt.Errorf("failed to create image versions client: %w", err) + } + + pager := imageVersionsClient.NewListByGalleryImagePager(image.Gallery.ResourceGroupName, image.Gallery.Name, image.Name, nil) + + type candidate struct { + version *armcompute.GalleryImageVersion + nowTag int64 + } + var candidates []candidate + + overrideBuildID := Config.PreviouslyBuiltVHDBuildID + currentBuildID := Config.BuildID + + for pager.More() { + page, err := pager.NextPage(ctx) + if err != nil { + return "", fmt.Errorf("failed to get next page: %w", err) + } + for _, version := range page.Value { + if version.Properties == nil || version.Properties.ProvisioningState == nil { + continue + } + state := *version.Properties.ProvisioningState + if state != armcompute.GalleryProvisioningStateSucceeded && state != armcompute.GalleryProvisioningStateUpdating { + continue + } + + // skip versions tagged for no-selection + if _, ok := version.Tags[noSelectionTagName]; ok { + toolkit.Logf(ctx, "Skipping version %s: has %s tag", *version.ID, noSelectionTagName) + continue + } + + // if override is set, find the exact match + if overrideBuildID != "" { + bid, ok := getTagValueCI(version.Tags, "buildId") + if ok && bid == overrideBuildID { + toolkit.Logf(ctx, "Found pinned version by PreviouslyBuiltVHDBuildID=%s: %s", overrideBuildID, *version.ID) + image.Version = *version.Name + if err := a.ensureReplication(ctx, image, version, location); err != nil { + return "", fmt.Errorf("failed ensuring image replication: %w", err) + } + return VHDResourceID(*version.ID), nil + } + continue + } + + // filter to main-branch builds only + branch, ok := getTagValueCI(version.Tags, "branch") + if !ok || branch != "refs/heads/main" { + continue + } + + // exclude current build + bid, _ := getTagValueCI(version.Tags, "buildId") + if bid == currentBuildID { + toolkit.Logf(ctx, "Skipping version %s: matches current build ID %s", *version.ID, currentBuildID) + continue + } + + // parse "now" tag (unix epoch) for sorting + nowStr, ok := getTagValueCI(version.Tags, "now") + if !ok { + continue + } + nowVal, err := strconv.ParseInt(nowStr, 10, 64) + if err != nil { + toolkit.Logf(ctx, "Skipping version %s: malformed 'now' tag %q", *version.ID, nowStr) + continue + } + + candidates = append(candidates, candidate{version: version, nowTag: nowVal}) + } + } + + if overrideBuildID != "" { + return "", fmt.Errorf("could not find version with buildId=%s in %s: %w", overrideBuildID, image.azurePortalImageUrl(), ErrNotFound) + } + + if len(candidates) == 0 { + return "", fmt.Errorf("no previously built main-branch versions found in %s: %w", image.azurePortalImageUrl(), ErrNotFound) + } + + // sort descending by now tag (most recent first) + slices.SortFunc(candidates, func(a, b candidate) int { + return cmp.Compare(b.nowTag, a.nowTag) + }) + + selected := candidates[0] + toolkit.Logf(ctx, "Selected previously built version: %s (now=%d, buildId=%s)", + *selected.version.ID, + selected.nowTag, + func() string { v, _ := getTagValueCI(selected.version.Tags, "buildId"); return v }(), + ) + + image.Version = *selected.version.Name + if err := a.ensureReplication(ctx, image, selected.version, location); err != nil { + return "", fmt.Errorf("failed ensuring image replication: %w", err) + } + + return VHDResourceID(*selected.version.ID), nil +} + func (a *AzureClient) ensureReplication(ctx context.Context, image *Image, version *armcompute.GalleryImageVersion, location string) error { // Wait for any ongoing update operations to complete first if err := a.waitForVersionOperationCompletion(ctx, image, version); err != nil { diff --git a/e2e/config/config.go b/e2e/config/config.go index d61db484c6e..5bfd6bf4c58 100644 --- a/e2e/config/config.go +++ b/e2e/config/config.go @@ -88,6 +88,10 @@ type Configuration struct { TestTimeoutCluster time.Duration `env:"TEST_TIMEOUT_CLUSTER" envDefault:"20m"` TestTimeoutVMSS time.Duration `env:"TEST_TIMEOUT_VMSS" envDefault:"17m"` WindowsAdminPassword string `env:"WINDOWS_ADMIN_PASSWORD"` + // PreviouslyBuiltVHDBuildID, when set, pins the "previously built VHD" lookup + // to a specific ADO build ID (matched against the "buildId" tag on gallery versions). + // When empty, the lookup selects the most recent main-branch build excluding the current one. + PreviouslyBuiltVHDBuildID string `env:"PREVIOUSLY_BUILT_VHD_BUILD_ID"` } func (c *Configuration) BlobStorageAccount() string { diff --git a/e2e/config/vhd.go b/e2e/config/vhd.go index 621f2f82287..b23ee56da8c 100644 --- a/e2e/config/vhd.go +++ b/e2e/config/vhd.go @@ -352,7 +352,7 @@ func (i *Image) SupportsScriptless() bool { return !i.Flatcar && !i.Distro.IsWindowsDistro() } -func GetVHDResourceID(ctx context.Context, i Image, location string) (VHDResourceID, error) { +func GetVHDResourceID(ctx context.Context, i Image, location string, usePreviouslyBuiltVHD bool) (VHDResourceID, error) { switch { case i.Version != "": vhd, err := Azure.EnsureSIGImageVersion(ctx, &i, location) @@ -361,6 +361,13 @@ func GetVHDResourceID(ctx context.Context, i Image, location string) (VHDResourc } toolkit.Logf(ctx, "Got image by version: %s", i.azurePortalImageVersionUrl()) return vhd, nil + case usePreviouslyBuiltVHD: + vhd, err := Azure.PreviouslyBuiltSIGVersion(ctx, &i, location) + if err != nil { + return "", fmt.Errorf("failed to get previously built VHD for %s: %w", i.Name, err) + } + toolkit.Logf(ctx, "got previously built VHD: %s", i.azurePortalImageVersionUrl()) + return vhd, nil default: vhd, err := Azure.LatestSIGImageVersionByTag(ctx, &i, Config.SIGVersionTagName, Config.SIGVersionTagValue, location) if err != nil { diff --git a/e2e/scenario_test.go b/e2e/scenario_test.go index 1c7efa1bc71..374af39140e 100644 --- a/e2e/scenario_test.go +++ b/e2e/scenario_test.go @@ -464,8 +464,9 @@ func Test_ACL_ABUpdate(t *testing.T) { ABUpdate: true, }, Config: Config{ - Cluster: ClusterKubenet, - VHD: config.VHDACLGen2TL, + Cluster: ClusterKubenet, + VHD: config.VHDACLGen2TL, + UsePreviouslyBuiltVHD: true, VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) { vmss.Properties = addTrustedLaunchToVMSS(vmss.Properties) }, diff --git a/e2e/test_helpers.go b/e2e/test_helpers.go index 374bb80598d..3381b2a660d 100644 --- a/e2e/test_helpers.go +++ b/e2e/test_helpers.go @@ -377,8 +377,9 @@ func maybeSkipScenario(ctx context.Context, t testing.TB, s *Scenario) { } _, err := CachedPrepareVHD(ctx, GetVHDRequest{ - Image: *s.VHD, - Location: s.Location, + Image: *s.VHD, + Location: s.Location, + UsePreviouslyBuiltVHD: s.UsePreviouslyBuiltVHD, }) if err != nil { if config.Config.IgnoreScenariosWithMissingVHD && errors.Is(err, config.ErrNotFound) { diff --git a/e2e/types.go b/e2e/types.go index 4cbae5487fa..4db315ec0ca 100644 --- a/e2e/types.go +++ b/e2e/types.go @@ -218,6 +218,12 @@ type Config struct { // This prevents the Guest Agent from sweeping events before they can be read. // Only set this on CSE performance test scenarios. EagerCSETimingExtraction bool + + // UsePreviouslyBuiltVHD when true resolves the VHD to the most recent prior + // main-branch build in the build gallery, excluding the current pipeline run. + // This is used by the A/B update test to avoid the UUID collision where the + // current build's VHD and COSI share filesystem UUIDs. + UsePreviouslyBuiltVHD bool } func (s *Scenario) PrepareAKSNodeConfig() { @@ -228,8 +234,9 @@ func (s *Scenario) PrepareAKSNodeConfig() { // This method will also use the scenario's configured VHD selector to modify the input VMSS to reference the correct VHD resource. func (s *Scenario) PrepareVMSSModel(ctx context.Context, t testing.TB, vmss *armcompute.VirtualMachineScaleSet) { resourceID, err := CachedPrepareVHD(ctx, GetVHDRequest{ - Image: *s.VHD, - Location: s.Location, + Image: *s.VHD, + Location: s.Location, + UsePreviouslyBuiltVHD: s.UsePreviouslyBuiltVHD, }) require.NoError(t, err) require.NotEmpty(t, resourceID, "VHDSelector.ResourceID")