feat: end-to-end CI tests for backup/restore flow (#22) #20
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Deployment Verification | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| branches: | |
| - main | |
| schedule: | |
| # Weekly rebuild to catch upstream image drift (new Traefik, Keycloak, | |
| # or Postgres patch releases that break deployment). | |
| - cron: "0 6 * * 1" | |
| workflow_dispatch: | |
| concurrency: | |
| group: deployment-verification-${{ github.ref }} | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| permissions: | |
| contents: read | |
| jobs: | |
| lint: | |
| name: Lint shell scripts and workflow YAML | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 5 | |
| permissions: | |
| contents: read | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: ShellCheck | |
| # Uses the official koalaman/shellcheck-alpine image directly rather | |
| # than an intermediate GitHub Action, so there is one less supply-chain | |
| # layer to pin and review. Covers both repo-root scripts and the | |
| # tests/ directory so the backup-restore-e2e runner is linted too. | |
| run: | | |
| docker run --rm -v "$PWD:/mnt" -w /mnt \ | |
| koalaman/shellcheck-alpine:stable \ | |
| shellcheck ./*.sh tests/*.sh | |
| - name: actionlint (GitHub Actions workflow linting) | |
| # Uses the rhysd/actionlint image directly pinned to a specific | |
| # version. Surfaces workflow typos, invalid references to jobs/ | |
| # outputs, and common GitHub Actions footguns the YAML parser | |
| # doesn't catch. actionlint itself is a single Go binary. | |
| run: | | |
| docker run --rm -v "$PWD:/mnt" -w /mnt \ | |
| rhysd/actionlint:1.7.12 \ | |
| -color | |
| scan-trivy: | |
| name: Scan pinned upstream image with Trivy | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| # Trivy findings don't block the pipeline — they surface in the Security | |
| # tab where they can be triaged and fixed via Dependabot upstream-digest | |
| # bumps. A hard block here would cause CI failures on every new CVE | |
| # disclosure, which isn't actionable inside this PR. | |
| continue-on-error: true | |
| permissions: | |
| contents: read | |
| security-events: write | |
| strategy: | |
| # One job per upstream image — findings show up separately in the | |
| # GitHub Security tab under distinct categories (trivy-postgres, | |
| # trivy-traefik, trivy-keycloak). | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - name: postgres | |
| image: "postgres:16@sha256:71e27bf60b70bded003791b5573f8b808365613f341df20ffcf0c1ed7bc13ddf" | |
| - name: traefik | |
| image: "traefik:3.2@sha256:e561a37f8710d9cf41c78bdf421d822b2c0b48267ec0552e644565fb55466ea9" | |
| - name: keycloak | |
| image: "quay.io/keycloak/keycloak:26.2.5@sha256:4883630ef9db14031cde3e60700c9a9a8eaf1b5c24db1589d6a2d43de38ba2a9" | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Trivy scan of ${{ matrix.name }} | |
| uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # v0.35.0 | |
| with: | |
| image-ref: ${{ matrix.image }} | |
| format: sarif | |
| output: trivy-${{ matrix.name }}.sarif | |
| severity: CRITICAL,HIGH | |
| ignore-unfixed: true | |
| - name: Upload Trivy SARIF (${{ matrix.name }}) to GitHub Security | |
| uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 | |
| with: | |
| sarif_file: trivy-${{ matrix.name }}.sarif | |
| category: trivy-${{ matrix.name }} | |
| deploy-and-test: | |
| name: docker compose up + HTTPS + Traefik dashboard smoke | |
| runs-on: ubuntu-latest | |
| # Wait for lint to pass so we don't burn the 15-minute compose-up slot | |
| # on a workflow that has shellcheck/actionlint errors. scan-trivy runs | |
| # in parallel (not a dependency) since findings don't block deployment. | |
| needs: lint | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| env: | |
| NETWORK_ONE: keycloak-network | |
| NETWORK_TWO: traefik-network | |
| DOCKER_COMPOSE_FILE: keycloak-traefik-letsencrypt-docker-compose.yml | |
| APP_HOSTNAME: keycloak.heyvaldemar.net | |
| APP_TRAEFIK_HOSTNAME: traefik.keycloak.heyvaldemar.net | |
| COMPOSE_PROJECT_NAME: keycloak | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Create necessary Docker networks | |
| run: | | |
| docker network create "$NETWORK_ONE" || true | |
| docker network create "$NETWORK_TWO" || true | |
| - name: Generate test .env with ephemeral credentials | |
| # The real .env is gitignored. CI generates throwaway credentials so | |
| # `docker compose up` succeeds without committing secrets to the repo. | |
| run: | | |
| cat > .env <<EOF | |
| TRAEFIK_IMAGE_TAG=traefik:3.2@sha256:e561a37f8710d9cf41c78bdf421d822b2c0b48267ec0552e644565fb55466ea9 | |
| TRAEFIK_LOG_LEVEL=WARN | |
| TRAEFIK_ACME_EMAIL=ci@example.com | |
| TRAEFIK_HOSTNAME=${APP_TRAEFIK_HOSTNAME} | |
| TRAEFIK_BASIC_AUTH=traefikadmin:\$\$2y\$\$10\$\$sMzJfirKC75x/hVpiINeZOiSm.Jkity9cn4KwNkRvO7hSQVFc5FLO | |
| KEYCLOAK_POSTGRES_IMAGE_TAG=postgres:16@sha256:71e27bf60b70bded003791b5573f8b808365613f341df20ffcf0c1ed7bc13ddf | |
| KEYCLOAK_IMAGE_TAG=quay.io/keycloak/keycloak:26.2.5@sha256:4883630ef9db14031cde3e60700c9a9a8eaf1b5c24db1589d6a2d43de38ba2a9 | |
| KEYCLOAK_DB_NAME=keycloakdb | |
| KEYCLOAK_DB_USER=keycloakdbuser | |
| KEYCLOAK_DB_PASSWORD=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) | |
| KEYCLOAK_ADMIN_USERNAME=keycloakadmin | |
| KEYCLOAK_ADMIN_PASSWORD=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) | |
| KEYCLOAK_HOSTNAME=${APP_HOSTNAME} | |
| KEYCLOAK_BACKUP_INIT_SLEEP=30m | |
| KEYCLOAK_BACKUP_INTERVAL=24h | |
| KEYCLOAK_POSTGRES_BACKUP_PRUNE_DAYS=7 | |
| KEYCLOAK_POSTGRES_BACKUPS_PATH=/srv/keycloak-postgres/backups | |
| KEYCLOAK_POSTGRES_BACKUP_NAME=keycloak-postgres-backup | |
| EOF | |
| echo "Generated ephemeral .env for CI run" | |
| - name: Start up services using Docker Compose | |
| run: docker compose -f "$DOCKER_COMPOSE_FILE" -p "$COMPOSE_PROJECT_NAME" up -d | |
| - name: Modify /etc/hosts for internal routing | |
| run: | | |
| echo "127.0.0.1 $APP_HOSTNAME" | sudo tee -a /etc/hosts | |
| echo "127.0.0.1 $APP_TRAEFIK_HOSTNAME" | sudo tee -a /etc/hosts | |
| - name: Print Docker Compose services status | |
| run: docker ps | |
| - name: Wait for the application to be ready via Traefik | |
| run: | | |
| echo "Checking the routing and availability of the application via Traefik..." | |
| # $APP_HOSTNAME is intentionally expanded by the inner bash -c | |
| # (which inherits the job-level env:), not by the outer shell. | |
| # shellcheck disable=SC2016 | |
| timeout 5m bash -c 'while ! curl -fsSLk "https://$APP_HOSTNAME"; do | |
| echo "Waiting for the application to be ready..." | |
| sleep 10 | |
| done' | |
| - name: Wait for the Traefik dashboard to be ready | |
| run: | | |
| echo "Checking the routing and availability of the Traefik dashboard..." | |
| # Same deferred-expansion pattern as above. | |
| # shellcheck disable=SC2016 | |
| timeout 5m bash -c 'while ! curl -fsSLk --write-out "%{http_code}" --output /dev/null "https://$APP_TRAEFIK_HOSTNAME" | grep -E "200|401"; do | |
| echo "Waiting for the application to be ready..." | |
| sleep 10 | |
| done' | |
| - name: Inspect Network Configuration | |
| run: | | |
| docker network inspect "$NETWORK_ONE" | |
| docker network inspect "$NETWORK_TWO" | |
| - name: Show container logs on failure | |
| if: failure() | |
| run: docker compose -f "$DOCKER_COMPOSE_FILE" -p "$COMPOSE_PROJECT_NAME" logs | |
| - name: Shutdown Docker Compose services | |
| if: always() | |
| run: docker compose -f "$DOCKER_COMPOSE_FILE" -p "$COMPOSE_PROJECT_NAME" down | |
| backup-restore-e2e: | |
| name: Backup + restore end-to-end smoke | |
| runs-on: ubuntu-latest | |
| # Parallel to deploy-and-test — backup/restore is orthogonal to HTTPS | |
| # routing, so one job failing doesn't mask the other. Both fan out from | |
| # `lint` so we don't burn the compose-up slot on a workflow with | |
| # shellcheck/actionlint errors. | |
| needs: lint | |
| timeout-minutes: 15 | |
| permissions: | |
| contents: read | |
| env: | |
| NETWORK_ONE: keycloak-network | |
| NETWORK_TWO: traefik-network | |
| DOCKER_COMPOSE_FILE: keycloak-traefik-letsencrypt-docker-compose.yml | |
| COMPOSE_PROJECT_NAME: keycloak | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| - name: Create necessary Docker networks | |
| run: | | |
| docker network create "$NETWORK_ONE" || true | |
| docker network create "$NETWORK_TWO" || true | |
| - name: Generate test .env with short backup intervals | |
| # CI tunes the backup loop timings from 30m/24h down to 10s/30s so | |
| # the backup cycle tests can complete in <5 min wall-clock. The | |
| # hostnames here are placeholder — this job never exercises Traefik | |
| # or Keycloak HTTPS routing (that's deploy-and-test's responsibility). | |
| run: | | |
| cat > .env <<EOF | |
| TRAEFIK_IMAGE_TAG=traefik:3.2@sha256:e561a37f8710d9cf41c78bdf421d822b2c0b48267ec0552e644565fb55466ea9 | |
| TRAEFIK_LOG_LEVEL=WARN | |
| TRAEFIK_ACME_EMAIL=ci@example.com | |
| TRAEFIK_HOSTNAME=traefik.keycloak.ci.example | |
| TRAEFIK_BASIC_AUTH=traefikadmin:\$\$2y\$\$10\$\$sMzJfirKC75x/hVpiINeZOiSm.Jkity9cn4KwNkRvO7hSQVFc5FLO | |
| KEYCLOAK_POSTGRES_IMAGE_TAG=postgres:16@sha256:71e27bf60b70bded003791b5573f8b808365613f341df20ffcf0c1ed7bc13ddf | |
| KEYCLOAK_IMAGE_TAG=quay.io/keycloak/keycloak:26.2.5@sha256:4883630ef9db14031cde3e60700c9a9a8eaf1b5c24db1589d6a2d43de38ba2a9 | |
| KEYCLOAK_DB_NAME=keycloakdb | |
| KEYCLOAK_DB_USER=keycloakdbuser | |
| KEYCLOAK_DB_PASSWORD=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) | |
| KEYCLOAK_ADMIN_USERNAME=keycloakadmin | |
| KEYCLOAK_ADMIN_PASSWORD=$(openssl rand -base64 24 | tr -d '/+=' | head -c 32) | |
| KEYCLOAK_HOSTNAME=keycloak.ci.example | |
| KEYCLOAK_BACKUP_INIT_SLEEP=10s | |
| KEYCLOAK_BACKUP_INTERVAL=30s | |
| KEYCLOAK_POSTGRES_BACKUP_PRUNE_DAYS=7 | |
| KEYCLOAK_POSTGRES_BACKUPS_PATH=/srv/keycloak-postgres/backups | |
| KEYCLOAK_POSTGRES_BACKUP_NAME=keycloak-postgres-backup | |
| EOF | |
| - name: Start up services using Docker Compose | |
| run: docker compose -f "$DOCKER_COMPOSE_FILE" -p "$COMPOSE_PROJECT_NAME" up -d | |
| - name: Wait for postgres to become healthy | |
| # docker compose --wait would be nicer but isn't universally | |
| # available on the runner's compose version. Polling pg_isready | |
| # is equivalent and works everywhere. | |
| run: | | |
| for i in $(seq 1 60); do | |
| if docker exec "$(docker ps -aqf "name=${COMPOSE_PROJECT_NAME}-postgres" | head -1)" \ | |
| pg_isready -q -U keycloakdbuser -d keycloakdb > /dev/null 2>&1; then | |
| echo "postgres ready after ${i} attempts" | |
| exit 0 | |
| fi | |
| sleep 2 | |
| done | |
| echo "postgres did not become ready within 120s" >&2 | |
| exit 1 | |
| - name: Print Docker Compose services status | |
| run: docker ps | |
| - name: Run backup/restore E2E tests | |
| run: ./tests/e2e-backup-restore.sh | |
| - name: Show container logs on failure | |
| if: failure() | |
| run: docker compose -f "$DOCKER_COMPOSE_FILE" -p "$COMPOSE_PROJECT_NAME" logs | |
| - name: Shutdown Docker Compose services | |
| if: always() | |
| run: docker compose -f "$DOCKER_COMPOSE_FILE" -p "$COMPOSE_PROJECT_NAME" down |