Skip to content

Commit 0bb705d

Browse files
authored
Add cleanup job support for pre-deletion hooks via Kubernetes Jobs (#48)
* feat: Add cleanup job feature Signed-off-by: Magnus Ullberg <magnus@ullberg.us>
1 parent 44ddee6 commit 0bb705d

20 files changed

Lines changed: 2107 additions & 105 deletions

File tree

.devcontainer/Dockerfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
66
# update PATH for local pip installs
77
ENV PATH="$PATH:~/.local/bin"
88

9+
# Install golangci-lint
10+
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl git xz-utils && rm -rf /var/lib/apt/lists/* \
11+
&& GOLANGCI_LINT_VERSION=2.6.2 \
12+
&& curl -sSLO "https://github.com/golangci/golangci-lint/releases/download/v${GOLANGCI_LINT_VERSION}/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz" \
13+
&& tar -xzf golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz -C /tmp \
14+
&& mv /tmp/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64/golangci-lint /usr/local/bin/golangci-lint \
15+
&& rm -rf /tmp/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64* golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz
16+
917
CMD sleep infinity
1018

1119
ENTRYPOINT []

.devcontainer/post-install.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,17 @@ set -eux
33

44
# initialize pre-commit
55
git config --global --add safe.directory /workspaces
6+
7+
# Install golangci-lint if not present (useful when devcontainer not rebuilt)
8+
if ! command -v golangci-lint >/dev/null 2>&1; then
9+
echo "golangci-lint not found, installing v2 via github release tarball"
10+
# We separate update and install to avoid shellcheck SC2015 warnings
11+
apt-get update || true
12+
apt-get install -y --no-install-recommends ca-certificates curl xz-utils || true
13+
GOLANGCI_LINT_VERSION=2.6.2
14+
curl -sSLO "https://github.com/golangci/golangci-lint/releases/download/v${GOLANGCI_LINT_VERSION}/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz" || true
15+
tar -xzf golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz -C /tmp || true
16+
mv /tmp/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64/golangci-lint /usr/local/bin/golangci-lint || true
17+
rm -rf /tmp/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64* golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz || true
18+
echo "golangci-lint installed"
19+
fi

.github/workflows/pr-check.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,15 @@ jobs:
1414
runs-on: ubuntu-latest
1515
steps:
1616
- name: Install pre-commit
17-
run: sudo apt-get update -y && sudo apt-get install -y pre-commit
17+
run: sudo apt-get update -y && sudo apt-get install -y pre-commit codespell gitleaks
18+
19+
- name: Install golangci/golangci-lint
20+
run: |
21+
export GOLANGCI_LINT_VERSION=2.6.2
22+
curl -sSLO "https://github.com/golangci/golangci-lint/releases/download/v${GOLANGCI_LINT_VERSION}/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz"
23+
tar -xzf golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz -C /tmp
24+
mv /tmp/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64/golangci-lint /usr/local/bin/golangci-lint
25+
rm -rf /tmp/golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64* golangci-lint-${GOLANGCI_LINT_VERSION}-linux-amd64.tar.gz
1826
1927
- name: Clone the code
2028
uses: actions/checkout@v5

.gitleaks.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
title = "gitleaks: project custom allowlist"
2+
3+
useDefault = true
4+
5+
[[allowlists]]
6+
description = "Ignore known example keys in examples/cleanup"
7+
paths = [ '''examples/cleanup/backup-to-s3.yaml''' ]
8+
targetRules = [ "aws-access-token" ]

.pre-commit-config.yaml

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,54 @@ repos:
44
hooks:
55
- id: trailing-whitespace
66
- id: end-of-file-fixer
7-
# - id: check-yaml
8-
# args: [--allow-multiple-documents]
7+
- id: check-yaml
8+
args: [--allow-multiple-documents]
9+
# Don't try to validate Helm chart templates — they contain Go
10+
# templating and are not valid YAML until rendered.
11+
exclude: ^object-lease-operator/helm-charts/
912
- id: check-json
10-
# - id: check-added-large-files
1113
- id: detect-private-key
1214
- id: no-commit-to-branch
13-
# - repo: https://github.com/golangci/golangci-lint
14-
# rev: v2.2.2
15-
# hooks:
16-
# - id: golangci-lint
17-
# name: golangci-lint
18-
# description: Fast linters runner for Go. Note that only modified files are linted, so linters like 'unused' that need to scan all files won't work as expected.
19-
# entry: golangci-lint run --fix
20-
# types: [go]
21-
# language: golang
22-
# require_serial: true
23-
# pass_filenames: false
24-
# - id: golangci-lint-fmt
25-
# name: golangci-lint-fmt
26-
# description: Fast linters runner for Go. Note that only modified files are linted, so linters like 'unused' that need to scan all files won't work as expected.
27-
# entry: golangci-lint fmt
28-
# types: [go]
29-
# language: golang
30-
# require_serial: true
31-
# pass_filenames: false
15+
16+
- repo: local
17+
hooks:
18+
- id: shellcheck
19+
name: Shellcheck
20+
entry: shellcheck -x
21+
language: system
22+
files: ^(scripts/|run.sh$|.*\.sh$)
23+
24+
- id: gitleaks
25+
name: Gitleaks (secrets scanner)
26+
entry: gitleaks detect
27+
pass_filenames: false
28+
always_run: true
29+
language: system
30+
31+
- id: codespell
32+
name: Codespell (spell check docs)
33+
entry: codespell
34+
language: system
35+
# Only run on docs and README to avoid noisy suggestions in tests/code
36+
files: ^(README.md|docs/|object-lease-console-plugin/)
37+
38+
- id: make-vet
39+
name: Run `go vet`
40+
entry: make vet
41+
language: system
42+
pass_filenames: false
43+
always_run: true
44+
45+
- id: make-fmt
46+
name: Run repository formatter (go fmt)
47+
entry: make fmt
48+
language: system
49+
pass_filenames: false
50+
always_run: true
51+
52+
- id: make-lint
53+
name: Run repository linter
54+
entry: make lint
55+
language: system
56+
pass_filenames: false
57+
always_run: true

Makefile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ fmt: ## Format Go code
3636
vet: ## Vet Go code
3737
go vet ./...
3838

39+
.PHONY: lint
40+
lint: ## Run golangci-lint (requires golangci-lint installed)
41+
golangci-lint run
42+
3943
.PHONY: test
4044
test: tidy fmt vet ## Run tests with coverage
4145
go test ./... -race -coverprofile=coverage.out
@@ -50,11 +54,12 @@ run: build ## Run the application locally
5054
./$(BUILD_DIR)/$(BINARY_NAME) \
5155
-group startpunkt.ullberg.us \
5256
-kind Application \
53-
-version v1alpha2 \
57+
-version v1alpha4 \
5458
-leader-elect \
5559
-leader-elect-namespace default \
56-
-opt-in-label-key "object-lease-controller.ullberg.io/enabled" \
57-
-opt-in-label-value true
60+
# -opt-in-label-key "object-lease-controller.ullberg.io/enabled" \
61+
# -opt-in-label-value true \
62+
-zap-log-level debug
5863

5964
# =============================================================================
6065
# Docker Targets - Main Controller

README.md

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ This project implements a Kubernetes operator that allows you to specify a TTL (
1313
## Features
1414
- Deploys as an operator.
1515
- Dynamically deploys a controller for each configured GVK.
16-
- Controllers are only managing one GVK each, increasing scaleability.
16+
- Controllers are only managing one GVK each, increasing scalability.
1717
- Leader election support for high availability.
18+
- Custom cleanup scripts via Kubernetes Jobs before object deletion.
1819

1920
## Architecture
2021
The operator is designed to be highly extensible and scalable. Once deployed, the operator looks for CRDs and for each GVK specified in a CRD, a dedicated controller is launched.
@@ -134,6 +135,64 @@ Set by the controller. RFC3339 UTC timestamp for when the object will expire. Sa
134135

135136
Set by the controller. Human readable status or validation errors.
136137

138+
### Cleanup Job Annotations
139+
140+
The controller supports running custom cleanup scripts via Kubernetes Jobs before deleting expired objects. This is useful for backing up data, notifying external systems, or cleaning up related resources.
141+
142+
#### object-lease-controller.ullberg.io/on-delete-job
143+
144+
**Required for cleanup jobs**. Specifies the ConfigMap and script key in the format `configmap-name/script-key`.
145+
146+
Example:
147+
```bash
148+
kubectl annotate application my-app object-lease-controller.ullberg.io/on-delete-job=cleanup-scripts/backup.sh
149+
```
150+
151+
#### object-lease-controller.ullberg.io/job-service-account
152+
153+
**Optional** (default: `default`). ServiceAccount to run the cleanup Job as. Use this to grant the cleanup script access to necessary permissions and secrets.
154+
155+
#### object-lease-controller.ullberg.io/job-image
156+
157+
**Optional** (default: `bitnami/kubectl:latest`). Container image for running the cleanup script.
158+
159+
#### object-lease-controller.ullberg.io/job-wait
160+
161+
**Optional** (default: `false`). If `true`, the controller waits for the Job to complete before deleting the object. If `false`, the Job runs in fire-and-forget mode.
162+
163+
#### object-lease-controller.ullberg.io/job-timeout
164+
165+
**Optional** (default: `5m`). Maximum time to wait for Job completion when `job-wait` is `true`. Supports flexible duration format (e.g., `10m`, `1h`, `30s`).
166+
167+
#### object-lease-controller.ullberg.io/job-ttl
168+
169+
**Optional** (default: `300`). TTL in seconds for Job cleanup via `ttlSecondsAfterFinished`.
170+
171+
#### object-lease-controller.ullberg.io/job-backoff-limit
172+
173+
**Optional** (default: `3`). Number of retries for failed Jobs.
174+
175+
### Cleanup Job Environment Variables
176+
177+
Cleanup scripts receive these environment variables:
178+
179+
- `OBJECT_NAME` - Name of the object being deleted
180+
- `OBJECT_NAMESPACE` - Namespace of the object
181+
- `OBJECT_KIND` - Kind (e.g., "Application")
182+
- `OBJECT_GROUP` - API group (e.g., "startpunkt.ullberg.us")
183+
- `OBJECT_VERSION` - API version (e.g., "v1alpha2")
184+
- `OBJECT_UID` - UID of the object
185+
- `OBJECT_RESOURCE_VERSION` - Resource version
186+
- `LEASE_STARTED_AT` - RFC3339 timestamp when lease started
187+
- `LEASE_EXPIRED_AT` - RFC3339 timestamp when lease expired
188+
- `OBJECT_LABELS` - JSON-encoded labels
189+
- `OBJECT_ANNOTATIONS` - JSON-encoded annotations
190+
191+
See [examples/cleanup/](examples/cleanup/) for complete examples including:
192+
- Backing up to S3
193+
- Webhook notifications
194+
- Cleaning up related Kubernetes resources
195+
137196
### Removing TTL
138197

139198
Remove `ttl` to stop lease management. The controller clears `lease-start`, `expire-at`, and `lease-status`.
@@ -146,6 +205,12 @@ kubectl annotate pod test object-lease-controller.ullberg.io/ttl-
146205
- Automatically manage leases for custom resources (e.g., Applications, Databases, Services)
147206
- Enforce expiration policies
148207
- Integrate with external systems for lease validation or renewal
208+
- Execute custom cleanup scripts before object deletion:
209+
- Back up data to external storage (S3, GCS, etc.)
210+
- Notify external systems or webhooks
211+
- Clean up dependent resources not covered by owner references
212+
- Archive logs or metrics
213+
- Graceful shutdown procedures
149214

150215
## Usage
151216

cmd/main.go

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strconv"
1010
"strings"
1111

12+
batchv1 "k8s.io/api/batch/v1"
1213
corev1 "k8s.io/api/core/v1"
1314
apimeta "k8s.io/apimachinery/pkg/api/meta"
1415
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -32,6 +33,16 @@ const (
3233
AnnLeaseStart = "object-lease-controller.ullberg.io/lease-start" // RFC3339 UTC
3334
AnnExpireAt = "object-lease-controller.ullberg.io/expire-at"
3435
AnnStatus = "object-lease-controller.ullberg.io/lease-status"
36+
37+
// Cleanup job annotation keys
38+
AnnOnDeleteJob = "object-lease-controller.ullberg.io/on-delete-job"
39+
AnnJobServiceAccount = "object-lease-controller.ullberg.io/job-service-account"
40+
AnnJobImage = "object-lease-controller.ullberg.io/job-image"
41+
AnnJobWait = "object-lease-controller.ullberg.io/job-wait"
42+
AnnJobTimeout = "object-lease-controller.ullberg.io/job-timeout"
43+
AnnJobTTL = "object-lease-controller.ullberg.io/job-ttl"
44+
AnnJobBackoffLimit = "object-lease-controller.ullberg.io/job-backoff-limit"
45+
AnnJobEnvSecrets = "object-lease-controller.ullberg.io/job-env-secrets"
3546
)
3647

3748
// ParseParams holds runtime configuration parsed from flags and environment.
@@ -57,10 +68,19 @@ var statFn = os.Stat
5768
var readFileFn = os.ReadFile
5869

5970
func main() {
60-
ctrl.SetLogger(zap.New())
71+
// Bind zap logging flags (e.g., -zap-log-level) to the global flag set
72+
// so callers (and the Makefile) can adjust verbosity. Don't set the
73+
// logger until after flags are parsed so the selected level is applied.
74+
var zapOpts zap.Options
75+
zapOpts.BindFlags(flag.CommandLine)
6176

6277
params := parseParameters()
6378

79+
// Set logger using the parsed zap options (this reads values parsed by
80+
// parseParameters which calls flag.Parse()). This allows callers to pass
81+
// flags like -zap-log-level=debug to control verbosity.
82+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zapOpts)))
83+
6484
enableLeaderElection, leaderElectionNamespace, errE := parseLeaderElectionConfig(params.LeaderElectionEnabled, params.LeaderElectionNamespace)
6585
if errE != nil {
6686
fmt.Printf("%v\n", errE)
@@ -75,6 +95,7 @@ func main() {
7595

7696
scheme := runtime.NewScheme()
7797
_ = corev1.AddToScheme(scheme)
98+
_ = batchv1.AddToScheme(scheme)
7899

79100
gvk := schema.GroupVersionKind{
80101
Group: params.Group,
@@ -266,7 +287,11 @@ func buildManagerOptions(scheme *runtime.Scheme, group, version, kind string, me
266287
Metrics: metricsServerOptions,
267288
HealthProbeBindAddress: probeAddr,
268289
Cache: cache.Options{
269-
DefaultTransform: util.MinimalObjectTransform(AnnTTL, AnnLeaseStart, AnnExpireAt, AnnStatus),
290+
DefaultTransform: util.MinimalObjectTransform(
291+
AnnTTL, AnnLeaseStart, AnnExpireAt, AnnStatus,
292+
AnnOnDeleteJob, AnnJobServiceAccount, AnnJobImage, AnnJobWait,
293+
AnnJobTimeout, AnnJobTTL, AnnJobBackoffLimit,
294+
),
270295
},
271296
}
272297
if pprofAddr != "" {
@@ -284,10 +309,18 @@ func newLeaseWatcher(mgr ctrl.Manager, gvk schema.GroupVersionKind, leaderElecti
284309
GVK: gvk,
285310
Recorder: mgr.GetEventRecorderFor(leaderElectionID),
286311
Annotations: controllers.Annotations{
287-
TTL: AnnTTL,
288-
LeaseStart: AnnLeaseStart,
289-
ExpireAt: AnnExpireAt,
290-
Status: AnnStatus,
312+
TTL: AnnTTL,
313+
LeaseStart: AnnLeaseStart,
314+
ExpireAt: AnnExpireAt,
315+
Status: AnnStatus,
316+
OnDeleteJob: AnnOnDeleteJob,
317+
JobServiceAccount: AnnJobServiceAccount,
318+
JobImage: AnnJobImage,
319+
JobWait: AnnJobWait,
320+
JobTimeout: AnnJobTimeout,
321+
JobTTL: AnnJobTTL,
322+
JobBackoffLimit: AnnJobBackoffLimit,
323+
JobEnvSecrets: AnnJobEnvSecrets,
291324
},
292325
Metrics: ometrics.NewLeaseMetrics(gvk),
293326
}

examples/cleanup/README.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Cleanup Job Examples
2+
3+
This directory contains example configurations for using cleanup jobs with the object-lease-controller.
4+
5+
## Examples
6+
7+
1. **backup-to-s3.yaml** - Complete example of backing up an object to S3 before deletion
8+
2. **notify-webhook.yaml** - Example of notifying an external webhook when an object expires
9+
3. **cleanup-related-resources.yaml** - Example of cleaning up related Kubernetes resources
10+
11+
## Quick Start
12+
13+
1. Create a ConfigMap with your cleanup script
14+
2. Create a ServiceAccount with necessary permissions
15+
3. Annotate your resource with cleanup job configuration
16+
4. When the lease expires, the cleanup job runs automatically
17+
18+
## Annotations
19+
20+
| Annotation | Required | Default | Description |
21+
|------------|----------|---------|-------------|
22+
| `object-lease-controller.ullberg.io/on-delete-job` | Yes (only if cleanup is needed) | - | ConfigMap reference in format `configmap-name/script-key` |
23+
| `object-lease-controller.ullberg.io/job-service-account` | No | `default` | ServiceAccount to run the Job as |
24+
| `object-lease-controller.ullberg.io/job-image` | No | `bitnami/kubectl:latest` | Container image for running the script |
25+
| `object-lease-controller.ullberg.io/job-env-secrets` | No | - | Comma-separated list of Secret names to mount as environment variables |
26+
| `object-lease-controller.ullberg.io/job-wait` | No | `false` | Wait for Job completion before deleting object |
27+
| `object-lease-controller.ullberg.io/job-timeout` | No | `5m` | Maximum time to wait for Job completion |
28+
| `object-lease-controller.ullberg.io/job-ttl` | No | `300` | TTL in seconds for Job cleanup |
29+
| `object-lease-controller.ullberg.io/job-backoff-limit` | No | `3` | Number of retries for failed Jobs |
30+
31+
## Environment Variables
32+
33+
The cleanup script receives these environment variables:
34+
35+
- `OBJECT_NAME` - Name of the object being deleted
36+
- `OBJECT_NAMESPACE` - Namespace of the object
37+
- `OBJECT_KIND` - Kind (e.g., "Application")
38+
- `OBJECT_GROUP` - API group
39+
- `OBJECT_VERSION` - API version
40+
- `OBJECT_UID` - UID of the object
41+
- `OBJECT_RESOURCE_VERSION` - Resource version
42+
- `LEASE_STARTED_AT` - RFC3339 timestamp when lease started
43+
- `LEASE_EXPIRED_AT` - RFC3339 timestamp when lease expired
44+
- `OBJECT_LABELS` - JSON-encoded labels
45+
- `OBJECT_ANNOTATIONS` - JSON-encoded annotations

0 commit comments

Comments
 (0)