-
Notifications
You must be signed in to change notification settings - Fork 314
[DO NOT MERGE] Revert "Only apply 5-minute sleep workaround for known multi-arch repos" #5055
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
df878a3
3ed7a4c
bc5fecd
543ae8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -467,31 +467,13 @@ func isBuildPhaseTerminated(phase buildapi.BuildPhase) bool { | |
| } | ||
|
|
||
| type ImageBuildOptions struct { | ||
| Architectures []string | ||
| NeedsMultiArchWorkaround func() bool | ||
| Architectures []string | ||
| } | ||
|
|
||
| func newImageBuildOptions(archs []string) ImageBuildOptions { | ||
| return ImageBuildOptions{Architectures: archs} | ||
| } | ||
|
|
||
| // multiArchRepos are repos that need the sleep workaround for multi-arch builds. | ||
| var multiArchRepos = sets.New[string]( | ||
| "openshift/ci-tools", | ||
| "openshift/kueue-operator", | ||
| "openshift/loki", | ||
| "openshift/multiarch-tuning-operator", | ||
| "openshift/oadp-must-gather", | ||
| "openshift/oadp-operator", | ||
| "openshift/openshift-velero-plugin", | ||
| "openshift/velero", | ||
| "openshift/velero-plugin-for-aws", | ||
| "openshift/velero-plugin-for-gcp", | ||
| "openshift/velero-plugin-for-legacy-aws", | ||
| "openshift/velero-plugin-for-microsoft-azure", | ||
| "openshift-eng/baremetal-qe-infra", | ||
| ) | ||
|
|
||
| func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubernetes.PodClient, build buildapi.Build, metricsAgent *metrics.MetricsAgent, opts ...ImageBuildOptions) error { | ||
| var wg sync.WaitGroup | ||
|
|
||
|
|
@@ -500,17 +482,6 @@ func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubern | |
| o = opts[0] | ||
| } | ||
|
|
||
| // Create closure that checks if this build is for a multi-arch repo | ||
| needsMultiArchWorkaround := o.NeedsMultiArchWorkaround | ||
| if needsMultiArchWorkaround == nil { | ||
| needsMultiArchWorkaround = func() bool { | ||
| if org, repo := build.Labels[LabelMetadataOrg], build.Labels[LabelMetadataRepo]; org != "" && repo != "" { | ||
| return multiArchRepos.Has(fmt.Sprintf("%s/%s", org, repo)) | ||
| } | ||
| return false | ||
| } | ||
| } | ||
|
|
||
| builds := constructMultiArchBuilds(build, o.Architectures) | ||
| errChan := make(chan error, len(builds)) | ||
|
|
||
|
|
@@ -519,7 +490,7 @@ func handleBuilds(ctx context.Context, buildClient BuildClient, podClient kubern | |
| go func(b buildapi.Build) { | ||
| defer wg.Done() | ||
| metricsAgent.AddNodeWorkload(ctx, b.Namespace, fmt.Sprintf("%s-build", b.Name), b.Name, podClient) | ||
| if err := handleBuild(ctx, buildClient, podClient, b, needsMultiArchWorkaround); err != nil { | ||
| if err := handleBuild(ctx, buildClient, podClient, b, true); err != nil { | ||
| errChan <- fmt.Errorf("error occurred handling build %s: %w", b.Name, err) | ||
| } | ||
| metricsAgent.RemoveNodeWorkload(b.Name) | ||
|
|
@@ -577,15 +548,14 @@ func constructMultiArchBuilds(build buildapi.Build, stepArchitectures []string) | |
| return ret | ||
| } | ||
|
|
||
| func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build, needsMultiArchWorkaround func() bool) error { | ||
| func handleBuild(ctx context.Context, client BuildClient, podClient kubernetes.PodClient, build buildapi.Build, waitOnMultiArch bool) error { | ||
| const attempts = 5 | ||
| ns, name := build.Namespace, build.Name | ||
| var errs []error | ||
| if err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Minute, Factor: 1.5, Steps: attempts}, func() (bool, error) { | ||
| var attempt buildapi.Build | ||
| // TODO: This is a workaround to avoid race condition with multiple ci-operator instances building different architectures | ||
| // See https://issues.redhat.com/browse/OCPBUGS-65845 | ||
| if needsMultiArchWorkaround != nil && needsMultiArchWorkaround() { | ||
| if waitOnMultiArch { | ||
| // builds are using older src image, adding wait to avoid race condition | ||
| time.Sleep(5 * time.Minute) | ||
| } | ||
| build.DeepCopyInto(&attempt) | ||
|
Comment on lines
+551
to
559
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out race condition fix is a concern if merged. Lines 557-558 contain dead code with a comment acknowledging a race condition ("builds are using older src image"). The sleep that previously mitigated this is now commented out. Given the PR title "DO NOT MERGE" and
Consider removing the dead code and comment, or implementing a proper solution before any future merge. 🤖 Prompt for AI Agents |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unconditional 5-minute delay applied to all multi-arch builds.
Passing
truehere means every build routed throughhandleBuildswill now sleep 5 minutes, regardless of whether the repository actually needs the multi-arch workaround. Previously, this was gated by a repository allowlist check.Given the PR title indicates "DO NOT MERGE," this appears intentional for testing/investigation, but merging this would introduce a significant build-time regression.
🤖 Prompt for AI Agents