diff --git a/e2e/cache.go b/e2e/cache.go index 1b07d383815..91a2d64d5e1 100644 --- a/e2e/cache.go +++ b/e2e/cache.go @@ -214,14 +214,15 @@ func isNotFoundErr(err error) bool { var CachedPrepareVHD = cachedFunc(prepareVHD) type GetVHDRequest struct { - Location string - Image config.Image + Location string + Image config.Image + UsePreviouslyBuiltVHD bool } // prepareVHD retrieves the Azure resource ID for a VHD image. A gallery is scanned for the correct version // and replicated to the location specified in the request if it does not already exist. func prepareVHD(ctx context.Context, request GetVHDRequest) (config.VHDResourceID, error) { - return config.GetVHDResourceID(ctx, request.Image, request.Location) + return config.GetVHDResourceID(ctx, request.Image, request.Location, request.UsePreviouslyBuiltVHD) } var CachedEnsureResourceGroup = cachedFunc(ensureResourceGroup) diff --git a/e2e/config/azure.go b/e2e/config/azure.go index d0de6f04619..ba7e0970d35 100644 --- a/e2e/config/azure.go +++ b/e2e/config/azure.go @@ -557,6 +557,128 @@ func (a *AzureClient) LatestSIGImageVersionByTag(ctx context.Context, image *Ima return VHDResourceID(*latestVersion.ID), nil } +// getTagValueCI performs a case-insensitive lookup of a tag value from a gallery image version's tags. +func getTagValueCI(tags map[string]*string, key string) (string, bool) { + for k, v := range tags { + if strings.EqualFold(k, key) && v != nil { + return *v, true + } + } + return "", false +} + +// PreviouslyBuiltSIGVersion resolves the most recent prior main-branch build +// in the build gallery, excluding the current pipeline run. If +// Config.PreviouslyBuiltVHDBuildID is set, it pins to that specific build instead. +func (a *AzureClient) PreviouslyBuiltSIGVersion(ctx context.Context, image *Image, location string) (VHDResourceID, error) { + toolkit.Logf(ctx, "Looking up previously built SIG version in %s", image.azurePortalImageUrl()) + + imageVersionsClient, err := armcompute.NewGalleryImageVersionsClient(image.Gallery.SubscriptionID, a.Credential, a.ArmOptions) + if err != nil { + return "", fmt.Errorf("failed to create image versions client: %w", err) + } + + pager := imageVersionsClient.NewListByGalleryImagePager(image.Gallery.ResourceGroupName, image.Gallery.Name, image.Name, nil) + + type candidate struct { + version *armcompute.GalleryImageVersion + nowTag int64 + } + var candidates []candidate + + overrideBuildID := Config.PreviouslyBuiltVHDBuildID + currentBuildID := Config.BuildID + + for pager.More() { + page, err := pager.NextPage(ctx) + if err != nil { + return "", fmt.Errorf("failed to get next page: %w", err) + } + for _, version := range page.Value { + if version.Properties == nil || version.Properties.ProvisioningState == nil { + continue + } + state := *version.Properties.ProvisioningState + if state != armcompute.GalleryProvisioningStateSucceeded && state != armcompute.GalleryProvisioningStateUpdating { + continue + } + + // skip versions tagged for no-selection + if _, ok := version.Tags[noSelectionTagName]; ok { + toolkit.Logf(ctx, "Skipping version %s: has %s tag", *version.ID, noSelectionTagName) + continue + } + + // if override is set, find the exact match + if overrideBuildID != "" { + bid, ok := getTagValueCI(version.Tags, "buildId") + if ok && bid == overrideBuildID { + toolkit.Logf(ctx, "Found pinned version by PreviouslyBuiltVHDBuildID=%s: %s", overrideBuildID, *version.ID) + image.Version = *version.Name + if err := a.ensureReplication(ctx, image, version, location); err != nil { + return "", fmt.Errorf("failed ensuring image replication: %w", err) + } + return VHDResourceID(*version.ID), nil + } + continue + } + + // filter to main-branch builds only + branch, ok := getTagValueCI(version.Tags, "branch") + if !ok || branch != "refs/heads/main" { + continue + } + + // exclude current build + bid, _ := getTagValueCI(version.Tags, "buildId") + if bid == currentBuildID { + toolkit.Logf(ctx, "Skipping version %s: matches current build ID %s", *version.ID, currentBuildID) + continue + } + + // parse "now" tag (unix epoch) for sorting + nowStr, ok := getTagValueCI(version.Tags, "now") + if !ok { + continue + } + nowVal, err := strconv.ParseInt(nowStr, 10, 64) + if err != nil { + toolkit.Logf(ctx, "Skipping version %s: malformed 'now' tag %q", *version.ID, nowStr) + continue + } + + candidates = append(candidates, candidate{version: version, nowTag: nowVal}) + } + } + + if overrideBuildID != "" { + return "", fmt.Errorf("could not find version with buildId=%s in %s: %w", overrideBuildID, image.azurePortalImageUrl(), ErrNotFound) + } + + if len(candidates) == 0 { + return "", fmt.Errorf("no previously built main-branch versions found in %s: %w", image.azurePortalImageUrl(), ErrNotFound) + } + + // sort descending by now tag (most recent first) + slices.SortFunc(candidates, func(a, b candidate) int { + return cmp.Compare(b.nowTag, a.nowTag) + }) + + selected := candidates[0] + toolkit.Logf(ctx, "Selected previously built version: %s (now=%d, buildId=%s)", + *selected.version.ID, + selected.nowTag, + func() string { v, _ := getTagValueCI(selected.version.Tags, "buildId"); return v }(), + ) + + image.Version = *selected.version.Name + if err := a.ensureReplication(ctx, image, selected.version, location); err != nil { + return "", fmt.Errorf("failed ensuring image replication: %w", err) + } + + return VHDResourceID(*selected.version.ID), nil +} + func (a *AzureClient) ensureReplication(ctx context.Context, image *Image, version *armcompute.GalleryImageVersion, location string) error { // Wait for any ongoing update operations to complete first if err := a.waitForVersionOperationCompletion(ctx, image, version); err != nil { diff --git a/e2e/config/config.go b/e2e/config/config.go index d61db484c6e..5bfd6bf4c58 100644 --- a/e2e/config/config.go +++ b/e2e/config/config.go @@ -88,6 +88,10 @@ type Configuration struct { TestTimeoutCluster time.Duration `env:"TEST_TIMEOUT_CLUSTER" envDefault:"20m"` TestTimeoutVMSS time.Duration `env:"TEST_TIMEOUT_VMSS" envDefault:"17m"` WindowsAdminPassword string `env:"WINDOWS_ADMIN_PASSWORD"` + // PreviouslyBuiltVHDBuildID, when set, pins the "previously built VHD" lookup + // to a specific ADO build ID (matched against the "buildId" tag on gallery versions). + // When empty, the lookup selects the most recent main-branch build excluding the current one. + PreviouslyBuiltVHDBuildID string `env:"PREVIOUSLY_BUILT_VHD_BUILD_ID"` } func (c *Configuration) BlobStorageAccount() string { diff --git a/e2e/config/vhd.go b/e2e/config/vhd.go index 621f2f82287..b23ee56da8c 100644 --- a/e2e/config/vhd.go +++ b/e2e/config/vhd.go @@ -352,7 +352,7 @@ func (i *Image) SupportsScriptless() bool { return !i.Flatcar && !i.Distro.IsWindowsDistro() } -func GetVHDResourceID(ctx context.Context, i Image, location string) (VHDResourceID, error) { +func GetVHDResourceID(ctx context.Context, i Image, location string, usePreviouslyBuiltVHD bool) (VHDResourceID, error) { switch { case i.Version != "": vhd, err := Azure.EnsureSIGImageVersion(ctx, &i, location) @@ -361,6 +361,13 @@ func GetVHDResourceID(ctx context.Context, i Image, location string) (VHDResourc } toolkit.Logf(ctx, "Got image by version: %s", i.azurePortalImageVersionUrl()) return vhd, nil + case usePreviouslyBuiltVHD: + vhd, err := Azure.PreviouslyBuiltSIGVersion(ctx, &i, location) + if err != nil { + return "", fmt.Errorf("failed to get previously built VHD for %s: %w", i.Name, err) + } + toolkit.Logf(ctx, "got previously built VHD: %s", i.azurePortalImageVersionUrl()) + return vhd, nil default: vhd, err := Azure.LatestSIGImageVersionByTag(ctx, &i, Config.SIGVersionTagName, Config.SIGVersionTagValue, location) if err != nil { diff --git a/e2e/scenario_test.go b/e2e/scenario_test.go index 1c7efa1bc71..374af39140e 100644 --- a/e2e/scenario_test.go +++ b/e2e/scenario_test.go @@ -464,8 +464,9 @@ func Test_ACL_ABUpdate(t *testing.T) { ABUpdate: true, }, Config: Config{ - Cluster: ClusterKubenet, - VHD: config.VHDACLGen2TL, + Cluster: ClusterKubenet, + VHD: config.VHDACLGen2TL, + UsePreviouslyBuiltVHD: true, VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) { vmss.Properties = addTrustedLaunchToVMSS(vmss.Properties) }, diff --git a/e2e/test_helpers.go b/e2e/test_helpers.go index 374bb80598d..3381b2a660d 100644 --- a/e2e/test_helpers.go +++ b/e2e/test_helpers.go @@ -377,8 +377,9 @@ func maybeSkipScenario(ctx context.Context, t testing.TB, s *Scenario) { } _, err := CachedPrepareVHD(ctx, GetVHDRequest{ - Image: *s.VHD, - Location: s.Location, + Image: *s.VHD, + Location: s.Location, + UsePreviouslyBuiltVHD: s.UsePreviouslyBuiltVHD, }) if err != nil { if config.Config.IgnoreScenariosWithMissingVHD && errors.Is(err, config.ErrNotFound) { diff --git a/e2e/types.go b/e2e/types.go index 4cbae5487fa..4db315ec0ca 100644 --- a/e2e/types.go +++ b/e2e/types.go @@ -218,6 +218,12 @@ type Config struct { // This prevents the Guest Agent from sweeping events before they can be read. // Only set this on CSE performance test scenarios. EagerCSETimingExtraction bool + + // UsePreviouslyBuiltVHD when true resolves the VHD to the most recent prior + // main-branch build in the build gallery, excluding the current pipeline run. + // This is used by the A/B update test to avoid the UUID collision where the + // current build's VHD and COSI share filesystem UUIDs. + UsePreviouslyBuiltVHD bool } func (s *Scenario) PrepareAKSNodeConfig() { @@ -228,8 +234,9 @@ func (s *Scenario) PrepareAKSNodeConfig() { // This method will also use the scenario's configured VHD selector to modify the input VMSS to reference the correct VHD resource. func (s *Scenario) PrepareVMSSModel(ctx context.Context, t testing.TB, vmss *armcompute.VirtualMachineScaleSet) { resourceID, err := CachedPrepareVHD(ctx, GetVHDRequest{ - Image: *s.VHD, - Location: s.Location, + Image: *s.VHD, + Location: s.Location, + UsePreviouslyBuiltVHD: s.UsePreviouslyBuiltVHD, }) require.NoError(t, err) require.NotEmpty(t, resourceID, "VHDSelector.ResourceID")