From 7017b385d201d0a624e32b599e13453a3ec15d12 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 27 Apr 2026 12:42:54 -0700
Subject: [PATCH 01/18] secure run_simulators workflow with OIDC and SSM

---
 .github/workflows/run-simulators.yml | 656 ++++++++++++++++++---------
 1 file changed, 445 insertions(+), 211 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 99f9fd43a..13c891929 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -1,222 +1,456 @@
 name: run_simulators
+
 on:
   # IMPORTANT: this workflow should only be triggered manually via the Actions
   # portal of the repo!!! Do not modify this workflow's trigger!
   workflow_dispatch:
 
+# Deny GitHub token permissions by default; grant only what individual jobs need.
 permissions: {}
 
+# Only one simulator run should use the shared EC2 instance/volume at a time.
+concurrency:
+  group: sim
+  cancel-in-progress: false
+
+# Shared AWS config used by the EC2 and SSM commands.
+env:
+  INSTANCE_ID: ${{ vars.AWS_EC2_INSTANCE_ID }}
+  AWS_DEFAULT_REGION: ${{ vars.AWS_REGION }}
+
 jobs:
-    start_ec2_instance:
-      name: start_ec2_instance
-      runs-on: ubuntu-latest
-      concurrency:
-        group: sim
-      outputs:
-        volume_id: ${{ steps.create_volume_step.outputs.volume_id }}
-      env:
-        INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
-      steps:
-        - name: Create Volume from Latest Snapshot and Attach to Instance
-          id: create_volume_step
-          run: |
-            #  Retrieve the latest snapshot ID
-            LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text)
-            echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID"
-
-            # Wait for the snapshot to complete
-            aws ec2 wait snapshot-completed --snapshot-ids $LATEST_SNAPSHOT_ID
-            echo "Snapshot is ready."
-
-            # Create a new volume from the latest snapshot
-            volume_id=$(aws ec2 create-volume --snapshot-id $LATEST_SNAPSHOT_ID --availability-zone us-west-1b --volume-type gp3 --size 400 --throughput 250 --query "VolumeId" --output text)
-            echo "Created volume with ID: $volume_id"
-
-            # Set volume_id as output
-            echo "volume_id=$volume_id" >> $GITHUB_OUTPUT
-            cat $GITHUB_OUTPUT
-
-            # Wait until the volume is available
-            aws ec2 wait volume-available --volume-ids $volume_id
-            echo "Volume is now available"
-
-            # Attach the volume to the instance
-            aws ec2 attach-volume --volume-id $volume_id --instance-id $INSTANCE_ID --device /dev/sda1
-            echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1"
-
-        - name: Start EC2 Instance
-          run: |
-            # Get the instance state
-            instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
-
-            # If the machine is stopping wait for it to fully stop
-            while [ "$instance_state" == "stopping" ]; do
-                echo "Instance is stopping, waiting for it to fully stop..."
-                sleep 10
-                instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
-            done
-
-            # Check if instance state is "stopped"
-            if [[ "$instance_state" == "stopped" ]]; then
-                echo "Instance is stopped, starting it..."
-                aws ec2 start-instances --instance-ids $INSTANCE_ID
-            elif [[ "$instance_state" == "pending" ]]; then
-                echo "Instance startup is pending, continuing..."
-            elif [[ "$instance_state" == "running" ]]; then
-                echo "Instance is already running..."
-                exit 0
-            else
-                echo "Unknown instance state: $instance_state"
+  start_ec2_instance:
+    name: start_ec2_instance
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write # Required for GitHub OIDC -> AWS role assumption.
+    outputs:
+      volume_id: ${{ steps.create_volume_step.outputs.volume_id }}
+    steps:
+      # Use OIDC to get short-lived AWS credentials instead of storing long-lived AWS keys.
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
+        with:
+          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION }}
+          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+
+      - name: Create Volume from Latest Snapshot and Attach to Instance
+        id: create_volume_step
+        run: |
+          set -euo pipefail
+
+          # Retrieve the latest snapshot ID owned by this AWS account.
+          LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text)
+          echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID"
+
+          # Wait for the snapshot to complete before creating the temporary test volume.
+          aws ec2 wait snapshot-completed --snapshot-ids "$LATEST_SNAPSHOT_ID"
+          echo "Snapshot is ready."
+
+          # Create a temporary volume from the latest simulator snapshot.
+          volume_id=$(aws ec2 create-volume --snapshot-id "$LATEST_SNAPSHOT_ID" --availability-zone us-west-1b --volume-type gp3 --size 400 --throughput 250 --query "VolumeId" --output text)
+          echo "Created volume with ID: $volume_id"
+
+          # Save the volume ID so the cleanup job can detach/delete it later.
+          echo "volume_id=$volume_id" >> "$GITHUB_OUTPUT"
+
+          aws ec2 wait volume-available --volume-ids "$volume_id"
+          echo "Volume is now available"
+
+          aws ec2 attach-volume --volume-id "$volume_id" --instance-id "$INSTANCE_ID" --device /dev/sda1
+          echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1"
+
+      - name: Start EC2 Instance
+        run: |
+          set -euo pipefail
+
+          # Start the simulator instance only if it is currently stopped.
+          instance_state=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].State.Name')
+
+          while [ "$instance_state" = "stopping" ]; do
+            echo "Instance is stopping, waiting for it to fully stop..."
+            sleep 10
+            instance_state=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].State.Name')
+          done
+
+          if [ "$instance_state" = "stopped" ]; then
+            echo "Instance is stopped, starting it..."
+            aws ec2 start-instances --instance-ids "$INSTANCE_ID"
+          elif [ "$instance_state" = "pending" ]; then
+            echo "Instance startup is pending, continuing..."
+          elif [ "$instance_state" = "running" ]; then
+            echo "Instance is already running..."
+            exit 0
+          else
+            echo "Unknown instance state: $instance_state"
+            exit 1
+          fi
+
+          echo "Waiting for instance status checks to pass..."
+          aws ec2 wait instance-status-ok --instance-ids "$INSTANCE_ID"
+          echo "Instance is now ready for use."
+
+  check_simulator_version_updates:
+    name: check_simulator_version_updates
+    runs-on: ubuntu-latest
+    needs: start_ec2_instance
+    permissions:
+      id-token: write # Required because this job sends commands through AWS SSM.
+    steps:
+      # Use OIDC to get short-lived AWS credentials for SSM.
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
+        with:
+          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION }}
+          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+
+      - name: Check for Simulator Version Updates
+        env:
+          GH_SHA: ${{ github.sha }}
+        run: |
+          set -euo pipefail
+
+          # Build the remote shell script as a JSON array for AWS-RunShellScript.
+          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<EOF
+          set -euo pipefail
+          cd /home/ubuntu/actions/
+          rm -rf Scenic
+          git clone --no-checkout https://github.com/BerkeleyLearnVerify/Scenic.git Scenic
+          cd Scenic
+          git checkout --detach "$GH_SHA"
+          python3 -m venv venv
+          source venv/bin/activate
+          python3 -m pip install -e .[test-full]
+          python3 .github/check_latest_simulators.py
+          EOF
+          )
+
+          # Run the script on the EC2 instance through SSM instead of SSH.
+          command_id=$(aws ssm send-command \
+            --instance-ids "$INSTANCE_ID" \
+            --document-name AWS-RunShellScript \
+            --parameters "commands=$commands" \
+            --query 'Command.CommandId' \
+            --output text)
+
+          # Poll SSM until the remote command finishes, then print its output.
+          while true; do
+            status=$(aws ssm get-command-invocation \
+              --command-id "$command_id" \
+              --instance-id "$INSTANCE_ID" \
+              --query 'Status' \
+              --output text 2>/dev/null || true)
+
+            case "$status" in
+              Success)
+                break
+                ;;
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
-            fi
-
-            # Wait for instance status checks to pass
-            echo "Waiting for instance status checks to pass..."
-            aws ec2 wait instance-status-ok --instance-ids $INSTANCE_ID
-            echo "Instance is now ready for use."
-
-
-    check_simulator_version_updates:
-      name: check_simulator_version_updates
-      runs-on: ubuntu-latest
-      needs: start_ec2_instance
-      steps:
-        - name: Check for Simulator Version Updates
-          env:
-            PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
-            HOSTNAME: ${{ secrets.SSH_HOST }}
-            USER_NAME: ${{ secrets.SSH_USERNAME }}
-            GH_ACCESS_TOKEN: ${{ secrets.GH_ACCESS_TOKEN }}
-            GH_REF: ${{ github.ref }}
-          run: |
-            echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
-            ssh -o StrictHostKeyChecking=no -o SendEnv=GH_REF -i private_key ${USER_NAME}@${HOSTNAME} '
-              cd /home/ubuntu/actions/ &&
-              rm -rf Scenic &&
-              git clone --branch $(basename "$GH_REF") --single-branch https://$GH_ACCESS_TOKEN@github.com/BerkeleyLearnVerify/Scenic.git &&
-              cd Scenic &&
-              python3 -m venv venv &&
-              source venv/bin/activate &&
-              python3 -m pip install -e .[test-full] &&
-              python3 .github/check_latest_simulators.py
-            '
-
-    check_nvidia_smi:
-      name: check_nvidia_smi
-      runs-on: ubuntu-latest
-      needs: start_ec2_instance
-      continue-on-error: true
-      steps:
-        - name: Check NVIDIA SMI
-          env:
-            PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
-            HOSTNAME: ${{ secrets.SSH_HOST}}
-            USER_NAME: ${{ secrets.SSH_USERNAME}}
-          run: |
-            echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
-            ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
-              output=$(nvidia-smi)
-              echo "$output"
-              if [ -z "$output" ]; then
-                echo "NVIDIA Driver is not set"
+                ;;
+              Pending|InProgress|Delayed|"")
+                sleep 5
+                ;;
+              *)
+                echo "Unexpected SSM status: $status"
+                sleep 5
+                ;;
+            esac
+          done
+
+          aws ssm get-command-invocation \
+            --command-id "$command_id" \
+            --instance-id "$INSTANCE_ID" \
+            --query 'StandardOutputContent' \
+            --output text
+
+  check_nvidia_smi:
+    name: check_nvidia_smi
+    runs-on: ubuntu-latest
+    needs: start_ec2_instance
+    continue-on-error: true
+    permissions:
+      id-token: write # Required because this job sends commands through AWS SSM.
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
+        with:
+          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION }}
+          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+
+      - name: Check NVIDIA SMI
+        run: |
+          set -euo pipefail
+
+          # Run the GPU driver check remotely through SSM.
+          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          set -euo pipefail
+          output=$(nvidia-smi)
+          echo "$output"
+
+          if [ -z "$output" ]; then
+            echo "NVIDIA Driver is not set"
+            exit 1
+          fi
+          EOF
+          )
+
+          command_id=$(aws ssm send-command \
+            --instance-ids "$INSTANCE_ID" \
+            --document-name AWS-RunShellScript \
+            --parameters "commands=$commands" \
+            --query 'Command.CommandId' \
+            --output text)
+
+          while true; do
+            status=$(aws ssm get-command-invocation \
+              --command-id "$command_id" \
+              --instance-id "$INSTANCE_ID" \
+              --query 'Status' \
+              --output text 2>/dev/null || true)
+
+            case "$status" in
+              Success)
+                break
+                ;;
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
-              fi
-            '
-        - name: NVIDIA Driver is not set
-          if: ${{ failure() }}
-          run: |
-            echo "NVIDIA SMI is not working, please run the steps here on the instance:"
-            echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers"
-
-    run_carla_simulators:
-      name: run_carla_simulators
-      runs-on: ubuntu-latest
-      needs: [check_simulator_version_updates, check_nvidia_smi]
-      steps:
-        - name: Run CARLA Tests
-          env:
-            PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
-            HOSTNAME: ${{secrets.SSH_HOST}}
-            USER_NAME: ${{secrets.SSH_USERNAME}}
-          run: |
-            echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
-            ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -o ServerAliveCountMax=3 -i private_key ${USER_NAME}@${HOSTNAME} '
-              cd /home/ubuntu/actions/Scenic &&
-              source venv/bin/activate &&
-              carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*')) &&
-              for version in "${carla_versions[@]}"; do
-                echo "============================= CARLA $version ============================="
-                export CARLA_ROOT="$version"
-                pytest tests/simulators/carla
-              done
-            '
-
-    run_webots_simulators:
-      name: run_webots_simulators
-      runs-on: ubuntu-latest
-      needs: [check_simulator_version_updates, check_nvidia_smi]
-      steps:
-        - name: Run Webots Tests
-          env:
-            PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
-            HOSTNAME: ${{secrets.SSH_HOST}}
-            USER_NAME: ${{secrets.SSH_USERNAME}}
-          run: |
-            echo "$PRIVATE_KEY" > private_key && chmod 600 private_key
-            ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} '
-              Xvfb :99 -screen 0 1024x768x16 &
-              cd /home/ubuntu/actions/Scenic &&
-              source venv/bin/activate &&
-              webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*')) &&
-              export DISPLAY=:99 &&
-              for version in "${webots_versions[@]}"; do
-                echo "============================= Webots $version ============================="
-                export WEBOTS_ROOT="$version"
-                pytest tests/simulators/webots
-              done
-              kill %1
-            '
-
-    stop_ec2_instance:
-      name: stop_ec2_instance
-      runs-on: ubuntu-latest
-      needs: [start_ec2_instance, check_simulator_version_updates, check_nvidia_smi, run_carla_simulators, run_webots_simulators]
-      if: always()
-      env:
-        VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }}
-        INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
-      steps:
-        - name: Stop EC2 Instance
-          run: |
-            # Get the instance state and stop it if running
-            instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name')
-            if [[ "$instance_state" == "running" ]]; then
-              echo "Instance is running, stopping it..."
-              aws ec2 stop-instances --instance-ids $INSTANCE_ID
-              aws ec2 wait instance-stopped --instance-ids $INSTANCE_ID
-              echo "Instance has stopped."
-            elif [[ "$instance_state" == "stopped" ]]; then
-              echo "Instance is already stopped."
-            else
-              echo "Unexpected instance state: $instance_state"
-              exit 1
-            fi
-
-        - name: Detach Volume
-          run: |
-            # Detach the volume
-            aws ec2 detach-volume --volume-id $VOLUME_ID
-            aws ec2 wait volume-available --volume-ids $VOLUME_ID
-            echo "Volume $VOLUME_ID detached."
-
-        - name: Delete Volume
-          run: |
-            # Delete the volume after snapshot is complete
-            aws ec2 delete-volume --volume-id $VOLUME_ID
-            echo "Volume $VOLUME_ID deleted."
+                ;;
+              Pending|InProgress|Delayed|"")
+                sleep 5
+                ;;
+              *)
+                echo "Unexpected SSM status: $status"
+                sleep 5
+                ;;
+            esac
+          done
+
+          aws ssm get-command-invocation \
+            --command-id "$command_id" \
+            --instance-id "$INSTANCE_ID" \
+            --query 'StandardOutputContent' \
+            --output text
+
+      - name: NVIDIA Driver is not set
+        if: ${{ failure() }}
+        run: |
+          echo "NVIDIA SMI is not working, please run the steps here on the instance:"
+          echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers"
+
+  run_carla_simulators:
+    name: run_carla_simulators
+    runs-on: ubuntu-latest
+    needs: [check_simulator_version_updates, check_nvidia_smi]
+    permissions:
+      id-token: write # Required because this job sends commands through AWS SSM.
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
+        with:
+          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION }}
+          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+
+      - name: Run CARLA Tests
+        run: |
+          set -euo pipefail
+
+          # Run CARLA tests once for each installed CARLA version on the instance.
+          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          set -euo pipefail
+          cd /home/ubuntu/actions/Scenic
+          source venv/bin/activate
+
+          carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*'))
+          for version in "${carla_versions[@]}"; do
+            echo "============================= CARLA $version ============================="
+            export CARLA_ROOT="$version"
+            pytest tests/simulators/carla
+          done
+          EOF
+          )
+
+          command_id=$(aws ssm send-command \
+            --instance-ids "$INSTANCE_ID" \
+            --document-name AWS-RunShellScript \
+            --parameters "commands=$commands" \
+            --query 'Command.CommandId' \
+            --output text)
+
+          while true; do
+            status=$(aws ssm get-command-invocation \
+              --command-id "$command_id" \
+              --instance-id "$INSTANCE_ID" \
+              --query 'Status' \
+              --output text 2>/dev/null || true)
+
+            case "$status" in
+              Success)
+                break
+                ;;
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
+                exit 1
+                ;;
+              Pending|InProgress|Delayed|"")
+                sleep 10
+                ;;
+              *)
+                echo "Unexpected SSM status: $status"
+                sleep 10
+                ;;
+            esac
+          done
+
+          aws ssm get-command-invocation \
+            --command-id "$command_id" \
+            --instance-id "$INSTANCE_ID" \
+            --query 'StandardOutputContent' \
+            --output text
+
+  run_webots_simulators:
+    name: run_webots_simulators
+    runs-on: ubuntu-latest
+    needs: [check_simulator_version_updates, check_nvidia_smi]
+    permissions:
+      id-token: write # Required because this job sends commands through AWS SSM.
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
+        with:
+          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION }}
+          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+
+      - name: Run Webots Tests
+        run: |
+          set -euo pipefail
+
+          # Run Webots tests on a virtual display because Webots needs graphical support.
+          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          set -euo pipefail
+
+          Xvfb :99 -screen 0 1024x768x16 &
+          xvfb_pid=$!
+          trap "kill $xvfb_pid" EXIT
+
+          cd /home/ubuntu/actions/Scenic
+          source venv/bin/activate
+
+          webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*'))
+          export DISPLAY=:99
+
+          for version in "${webots_versions[@]}"; do
+            echo "============================= Webots $version ============================="
+            export WEBOTS_ROOT="$version"
+            pytest tests/simulators/webots
+          done
+          EOF
+          )
+
+          command_id=$(aws ssm send-command \
+            --instance-ids "$INSTANCE_ID" \
+            --document-name AWS-RunShellScript \
+            --parameters "commands=$commands" \
+            --query 'Command.CommandId' \
+            --output text)
+
+          while true; do
+            status=$(aws ssm get-command-invocation \
+              --command-id "$command_id" \
+              --instance-id "$INSTANCE_ID" \
+              --query 'Status' \
+              --output text 2>/dev/null || true)
+
+            case "$status" in
+              Success)
+                break
+                ;;
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
+                exit 1
+                ;;
+              Pending|InProgress|Delayed|"")
+                sleep 10
+                ;;
+              *)
+                echo "Unexpected SSM status: $status"
+                sleep 10
+                ;;
+            esac
+          done
+
+          aws ssm get-command-invocation \
+            --command-id "$command_id" \
+            --instance-id "$INSTANCE_ID" \
+            --query 'StandardOutputContent' \
+            --output text
+
+  stop_ec2_instance:
+    name: stop_ec2_instance
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write # Required for GitHub OIDC -> AWS role assumption.
+    needs:
+      [
+        start_ec2_instance,
+        check_simulator_version_updates,
+        check_nvidia_smi,
+        run_carla_simulators,
+        run_webots_simulators,
+      ]
+    if: always() # Run cleanup even if earlier jobs failed.
+    env:
+      VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }}
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
+        with:
+          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION }}
+          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+
+      - name: Stop EC2 Instance
+        run: |
+          set -euo pipefail
+
+          instance_state=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].State.Name')
+          if [ "$instance_state" = "running" ]; then
+            echo "Instance is running, stopping it..."
+            aws ec2 stop-instances --instance-ids "$INSTANCE_ID"
+            aws ec2 wait instance-stopped --instance-ids "$INSTANCE_ID"
+            echo "Instance has stopped."
+          elif [ "$instance_state" = "stopped" ]; then
+            echo "Instance is already stopped."
+          else
+            echo "Unexpected instance state: $instance_state"
+            exit 1
+          fi
+
+      - name: Detach Volume
+        run: |
+          set -euo pipefail
+
+          # Detach the temporary volume created from the latest snapshot.
+          aws ec2 detach-volume --volume-id "$VOLUME_ID"
+          aws ec2 wait volume-available --volume-ids "$VOLUME_ID"
+          echo "Volume $VOLUME_ID detached."
+
+      - name: Delete Volume
+        run: |
+          set -euo pipefail
+
+          # Delete the temporary volume so repeated workflow runs do not leave extra storage behind.
+          aws ec2 delete-volume --volume-id "$VOLUME_ID"
+          echo "Volume $VOLUME_ID deleted."
+

From 2a94e89ca161bf6aeb47a195fd5c0e2756bd3d98 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 27 Apr 2026 13:46:39 -0700
Subject: [PATCH 02/18] run SSM commands with Bash

---
 .github/workflows/run-simulators.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 13c891929..f07220591 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -113,7 +113,9 @@ jobs:
           set -euo pipefail
 
           # Build the remote shell script as a JSON array for AWS-RunShellScript.
+          # AWS documents this pattern for running Bash scripts through Run Command.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<EOF
+          #!/bin/bash
           set -euo pipefail
           cd /home/ubuntu/actions/
           rm -rf Scenic
@@ -189,6 +191,7 @@ jobs:
 
           # Run the GPU driver check remotely through SSM.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          #!/bin/bash
           set -euo pipefail
           output=$(nvidia-smi)
           echo "$output"
@@ -265,6 +268,7 @@ jobs:
 
           # Run CARLA tests once for each installed CARLA version on the instance.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          #!/bin/bash
           set -euo pipefail
           cd /home/ubuntu/actions/Scenic
           source venv/bin/activate
@@ -337,6 +341,7 @@ jobs:
 
           # Run Webots tests on a virtual display because Webots needs graphical support.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          #!/bin/bash
           set -euo pipefail
 
           Xvfb :99 -screen 0 1024x768x16 &
@@ -453,4 +458,3 @@ jobs:
           # Delete the temporary volume so repeated workflow runs do not leave extra storage behind.
           aws ec2 delete-volume --volume-id "$VOLUME_ID"
           echo "Volume $VOLUME_ID deleted."
-

From cb9d997d5e29aaa8abba0d40b7804e60f696a8d5 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 27 Apr 2026 14:24:21 -0700
Subject: [PATCH 03/18] set CARLA SSM runtime environment

---
 .github/workflows/run-simulators.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index f07220591..3f55bebec 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -270,6 +270,20 @@ jobs:
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
           #!/bin/bash
           set -euo pipefail
+
+          export HOME=/home/ubuntu
+          export USER=ubuntu
+          export LOGNAME=ubuntu
+          export XDG_RUNTIME_DIR=/tmp/runtime-ubuntu
+          mkdir -p "$XDG_RUNTIME_DIR"
+          chmod 700 "$XDG_RUNTIME_DIR"
+
+          whoami
+          echo "HOME=$HOME"
+          echo "USER=$USER"
+          echo "LOGNAME=$LOGNAME"
+          echo "XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR"
+
           cd /home/ubuntu/actions/Scenic
           source venv/bin/activate
 

From ca6d79238d614dd18ed3ad58970ac8b37fbb9bda Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 27 Apr 2026 15:13:00 -0700
Subject: [PATCH 04/18] run CARLA tests as ubuntu under SSM

---
 .github/workflows/run-simulators.yml | 40 +++++++++++-----------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 3f55bebec..41f90c698 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -113,7 +113,7 @@ jobs:
           set -euo pipefail
 
           # Build the remote shell script as a JSON array for AWS-RunShellScript.
-          # AWS documents this pattern for running Bash scripts through Run Command.
+          # The first remote command is #!/bin/bash so SSM runs the script with Bash.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<EOF
           #!/bin/bash
           set -euo pipefail
@@ -266,33 +266,25 @@ jobs:
         run: |
           set -euo pipefail
 
-          # Run CARLA tests once for each installed CARLA version on the instance.
+          # CARLA refuses to start as root. SSM Run Command runs as root by default,
+          # so run only the CARLA test block as the ubuntu user to match the old SSH behavior.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
           #!/bin/bash
           set -euo pipefail
 
-          export HOME=/home/ubuntu
-          export USER=ubuntu
-          export LOGNAME=ubuntu
-          export XDG_RUNTIME_DIR=/tmp/runtime-ubuntu
-          mkdir -p "$XDG_RUNTIME_DIR"
-          chmod 700 "$XDG_RUNTIME_DIR"
-
-          whoami
-          echo "HOME=$HOME"
-          echo "USER=$USER"
-          echo "LOGNAME=$LOGNAME"
-          echo "XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR"
-
-          cd /home/ubuntu/actions/Scenic
-          source venv/bin/activate
-
-          carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*'))
-          for version in "${carla_versions[@]}"; do
-            echo "============================= CARLA $version ============================="
-            export CARLA_ROOT="$version"
-            pytest tests/simulators/carla
-          done
+          sudo -u ubuntu -H bash -lc '
+            set -euo pipefail
+            cd /home/ubuntu/actions/Scenic
+            source venv/bin/activate
+
+            # Run CARLA tests once for each installed CARLA version on the instance.
+            carla_versions=($(find /software -maxdepth 1 -type d -name "carla*"))
+            for version in "${carla_versions[@]}"; do
+              echo "============================= CARLA $version ============================="
+              export CARLA_ROOT="$version"
+              pytest tests/simulators/carla
+            done
+          '
           EOF
           )
 

From 6661bbc95b3e22db5d1a5b14062814d4e9b4624d Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 27 Apr 2026 15:46:24 -0700
Subject: [PATCH 05/18] fix repo ownership for SSM simulator tests

---
 .github/workflows/run-simulators.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 41f90c698..47f50747a 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -126,6 +126,9 @@ jobs:
           source venv/bin/activate
           python3 -m pip install -e .[test-full]
           python3 .github/check_latest_simulators.py
+
+          # Later simulator tests run as ubuntu, so make sure the checked-out repo is writable by ubuntu.
+          chown -R ubuntu:ubuntu /home/ubuntu/actions/Scenic
           EOF
           )
 

From 324e759f792594d566944bf324bcf641b99ff6d6 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 27 Apr 2026 16:09:12 -0700
Subject: [PATCH 06/18] fix SSM checkout ownership before tests

---
 .github/workflows/run-simulators.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 47f50747a..b9d1a398b 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -122,13 +122,16 @@ jobs:
           git clone --no-checkout https://github.com/BerkeleyLearnVerify/Scenic.git Scenic
           cd Scenic
           git checkout --detach "$GH_SHA"
+
+          # Later simulator tests run as ubuntu, so make sure the checked-out repo is writable by ubuntu
+          # before any Scenic-generated files are created.
+          chown -R ubuntu:ubuntu /home/ubuntu/actions/Scenic
+          chmod -R u+rwX /home/ubuntu/actions/Scenic
+
           python3 -m venv venv
           source venv/bin/activate
           python3 -m pip install -e .[test-full]
           python3 .github/check_latest_simulators.py
-
-          # Later simulator tests run as ubuntu, so make sure the checked-out repo is writable by ubuntu.
-          chown -R ubuntu:ubuntu /home/ubuntu/actions/Scenic
           EOF
           )
 

From b034417fa2a93231b9081ab62779b8afed6b97f9 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 28 Apr 2026 09:29:08 -0700
Subject: [PATCH 07/18] run simulator setup as ubuntu under SSM

---
 .github/workflows/run-simulators.yml | 36 ++++++++++++++++------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index b9d1a398b..1858e3f39 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -117,21 +117,27 @@ jobs:
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<EOF
           #!/bin/bash
           set -euo pipefail
-          cd /home/ubuntu/actions/
-          rm -rf Scenic
-          git clone --no-checkout https://github.com/BerkeleyLearnVerify/Scenic.git Scenic
-          cd Scenic
-          git checkout --detach "$GH_SHA"
-
-          # Later simulator tests run as ubuntu, so make sure the checked-out repo is writable by ubuntu
-          # before any Scenic-generated files are created.
-          chown -R ubuntu:ubuntu /home/ubuntu/actions/Scenic
-          chmod -R u+rwX /home/ubuntu/actions/Scenic
-
-          python3 -m venv venv
-          source venv/bin/activate
-          python3 -m pip install -e .[test-full]
-          python3 .github/check_latest_simulators.py
+
+          # SSM runs as root by default. Prepare the workspace as root, then run
+          # the Scenic checkout/install as ubuntu so Scenic-generated files like
+          # src/scenic/syntax/parser.py are writable by the same user running CARLA tests.
+          mkdir -p /home/ubuntu/actions
+          chown -R ubuntu:ubuntu /home/ubuntu/actions
+
+          sudo -u ubuntu -H bash -lc '
+            set -euo pipefail
+
+            cd /home/ubuntu/actions/
+            rm -rf Scenic
+            git clone --no-checkout https://github.com/BerkeleyLearnVerify/Scenic.git Scenic
+            cd Scenic
+            git checkout --detach "$GH_SHA"
+
+            python3 -m venv venv
+            source venv/bin/activate
+            python3 -m pip install -e ".[test-full]"
+            python3 .github/check_latest_simulators.py
+          '
           EOF
           )
 

From cc3de854b103f8f6993dc702d7a62c637196df64 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 28 Apr 2026 16:02:38 -0700
Subject: [PATCH 08/18] cleanup

---
 .github/workflows/run-simulators.yml | 128 +++++++++++----------------
 1 file changed, 50 insertions(+), 78 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 1858e3f39..2dccad178 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -38,42 +38,42 @@ jobs:
       - name: Create Volume from Latest Snapshot and Attach to Instance
         id: create_volume_step
         run: |
-          set -euo pipefail
-
-          # Retrieve the latest snapshot ID owned by this AWS account.
+          # Retrieve the latest snapshot ID
           LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text)
           echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID"
 
-          # Wait for the snapshot to complete before creating the temporary test volume.
+          # Wait for the snapshot to complete
           aws ec2 wait snapshot-completed --snapshot-ids "$LATEST_SNAPSHOT_ID"
           echo "Snapshot is ready."
 
-          # Create a temporary volume from the latest simulator snapshot.
+          # Create a new volume from the latest snapshot
           volume_id=$(aws ec2 create-volume --snapshot-id "$LATEST_SNAPSHOT_ID" --availability-zone us-west-1b --volume-type gp3 --size 400 --throughput 250 --query "VolumeId" --output text)
           echo "Created volume with ID: $volume_id"
 
-          # Save the volume ID so the cleanup job can detach/delete it later.
+          # Set volume_id as output
           echo "volume_id=$volume_id" >> "$GITHUB_OUTPUT"
 
+          # Wait until the volume is available
           aws ec2 wait volume-available --volume-ids "$volume_id"
           echo "Volume is now available"
 
+          # Attach the volume to the instance
           aws ec2 attach-volume --volume-id "$volume_id" --instance-id "$INSTANCE_ID" --device /dev/sda1
           echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1"
 
       - name: Start EC2 Instance
         run: |
-          set -euo pipefail
-
-          # Start the simulator instance only if it is currently stopped.
+          # Get the instance state
           instance_state=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].State.Name')
 
+          # If the machine is stopping wait for it to fully stop
           while [ "$instance_state" = "stopping" ]; do
             echo "Instance is stopping, waiting for it to fully stop..."
             sleep 10
             instance_state=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].State.Name')
           done
 
+          # Check if instance state is "stopped"
           if [ "$instance_state" = "stopped" ]; then
             echo "Instance is stopped, starting it..."
             aws ec2 start-instances --instance-ids "$INSTANCE_ID"
@@ -87,6 +87,7 @@ jobs:
             exit 1
           fi
 
+          # Wait for instance status checks to pass
           echo "Waiting for instance status checks to pass..."
           aws ec2 wait instance-status-ok --instance-ids "$INSTANCE_ID"
           echo "Instance is now ready for use."
@@ -110,32 +111,24 @@ jobs:
         env:
           GH_SHA: ${{ github.sha }}
         run: |
-          set -euo pipefail
-
           # Build the remote shell script as a JSON array for AWS-RunShellScript.
           # The first remote command is #!/bin/bash so SSM runs the script with Bash.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<EOF
           #!/bin/bash
-          set -euo pipefail
-
-          # SSM runs as root by default. Prepare the workspace as root, then run
-          # the Scenic checkout/install as ubuntu so Scenic-generated files like
-          # src/scenic/syntax/parser.py are writable by the same user running CARLA tests.
-          mkdir -p /home/ubuntu/actions
-          chown -R ubuntu:ubuntu /home/ubuntu/actions
 
+          # SSM runs as root, but CARLA cannot. Use ubuntu for checkout/install too
+          # so generated files like parser.py are writable during simulator tests.
+          mkdir -p /home/ubuntu/actions &&
+          chown -R ubuntu:ubuntu /home/ubuntu/actions &&
           sudo -u ubuntu -H bash -lc '
-            set -euo pipefail
-
-            cd /home/ubuntu/actions/
-            rm -rf Scenic
-            git clone --no-checkout https://github.com/BerkeleyLearnVerify/Scenic.git Scenic
-            cd Scenic
-            git checkout --detach "$GH_SHA"
-
-            python3 -m venv venv
-            source venv/bin/activate
-            python3 -m pip install -e ".[test-full]"
+            cd /home/ubuntu/actions/ &&
+            rm -rf Scenic &&
+            git clone --no-checkout https://github.com/BerkeleyLearnVerify/Scenic.git Scenic &&
+            cd Scenic &&
+            git checkout --detach "$GH_SHA" &&
+            python3 -m venv venv &&
+            source venv/bin/activate &&
+            python3 -m pip install -e ".[test-full]" &&
             python3 .github/check_latest_simulators.py
           '
           EOF
@@ -161,7 +154,7 @@ jobs:
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling)
+              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
@@ -199,13 +192,9 @@ jobs:
 
       - name: Check NVIDIA SMI
         run: |
-          set -euo pipefail
-
-          # Run the GPU driver check remotely through SSM.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
           #!/bin/bash
-          set -euo pipefail
-          output=$(nvidia-smi)
+          output=$(nvidia-smi) &&
           echo "$output"
 
           if [ -z "$output" ]; then
@@ -233,7 +222,7 @@ jobs:
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling)
+              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
@@ -276,21 +265,14 @@ jobs:
 
       - name: Run CARLA Tests
         run: |
-          set -euo pipefail
-
           # CARLA refuses to start as root. SSM Run Command runs as root by default,
-          # so run only the CARLA test block as the ubuntu user to match the old SSH behavior.
+          # so run the CARLA test block as the ubuntu user.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
           #!/bin/bash
-          set -euo pipefail
-
           sudo -u ubuntu -H bash -lc '
-            set -euo pipefail
-            cd /home/ubuntu/actions/Scenic
-            source venv/bin/activate
-
-            # Run CARLA tests once for each installed CARLA version on the instance.
-            carla_versions=($(find /software -maxdepth 1 -type d -name "carla*"))
+            cd /home/ubuntu/actions/Scenic &&
+            source venv/bin/activate &&
+            carla_versions=($(find /software -maxdepth 1 -type d -name "carla*")) &&
             for version in "${carla_versions[@]}"; do
               echo "============================= CARLA $version ============================="
               export CARLA_ROOT="$version"
@@ -318,7 +300,7 @@ jobs:
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling)
+              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
@@ -355,28 +337,23 @@ jobs:
 
       - name: Run Webots Tests
         run: |
-          set -euo pipefail
-
-          # Run Webots tests on a virtual display because Webots needs graphical support.
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
           #!/bin/bash
-          set -euo pipefail
-
-          Xvfb :99 -screen 0 1024x768x16 &
-          xvfb_pid=$!
-          trap "kill $xvfb_pid" EXIT
-
-          cd /home/ubuntu/actions/Scenic
-          source venv/bin/activate
-
-          webots_versions=($(find /software -maxdepth 1 -type d -name 'webots*'))
-          export DISPLAY=:99
-
-          for version in "${webots_versions[@]}"; do
-            echo "============================= Webots $version ============================="
-            export WEBOTS_ROOT="$version"
-            pytest tests/simulators/webots
-          done
+          sudo -u ubuntu -H bash -lc '
+            Xvfb :99 -screen 0 1024x768x16 &
+            xvfb_pid=$!
+            trap "kill $xvfb_pid" EXIT
+
+            cd /home/ubuntu/actions/Scenic &&
+            source venv/bin/activate &&
+            webots_versions=($(find /software -maxdepth 1 -type d -name "webots*")) &&
+            export DISPLAY=:99 &&
+            for version in "${webots_versions[@]}"; do
+              echo "============================= Webots $version ============================="
+              export WEBOTS_ROOT="$version"
+              pytest tests/simulators/webots
+            done
+          '
           EOF
           )
 
@@ -398,7 +375,7 @@ jobs:
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling)
+              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
@@ -432,7 +409,7 @@ jobs:
         run_carla_simulators,
         run_webots_simulators,
       ]
-    if: always() # Run cleanup even if earlier jobs failed.
+    if: always()
     env:
       VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }}
     steps:
@@ -445,8 +422,7 @@ jobs:
 
       - name: Stop EC2 Instance
         run: |
-          set -euo pipefail
-
+          # Get the instance state and stop it if running
           instance_state=$(aws ec2 describe-instances --instance-ids "$INSTANCE_ID" | jq -r '.Reservations[].Instances[].State.Name')
           if [ "$instance_state" = "running" ]; then
             echo "Instance is running, stopping it..."
@@ -462,17 +438,13 @@ jobs:
 
       - name: Detach Volume
         run: |
-          set -euo pipefail
-
-          # Detach the temporary volume created from the latest snapshot.
+          # Detach the volume
           aws ec2 detach-volume --volume-id "$VOLUME_ID"
           aws ec2 wait volume-available --volume-ids "$VOLUME_ID"
           echo "Volume $VOLUME_ID detached."
 
       - name: Delete Volume
         run: |
-          set -euo pipefail
-
-          # Delete the temporary volume so repeated workflow runs do not leave extra storage behind.
+          # Delete the volume after snapshot is complete
           aws ec2 delete-volume --volume-id "$VOLUME_ID"
           echo "Volume $VOLUME_ID deleted."

From 7f567476873645f807a0eb4d057a397f7edd4004 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 28 Apr 2026 16:06:21 -0700
Subject: [PATCH 09/18] cleanup

---
 .github/workflows/run-simulators.yml | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 2dccad178..16f91089c 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -147,19 +147,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
+              --query 'StatusDetails' \
               --output text 2>/dev/null || true)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
+              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
                 ;;
-              Pending|InProgress|Delayed|"")
+              Pending|"In Progress"|InProgress|Delayed|"")
                 sleep 5
                 ;;
               *)
@@ -215,19 +215,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
+              --query 'StatusDetails' \
               --output text 2>/dev/null || true)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
+              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
                 ;;
-              Pending|InProgress|Delayed|"")
+              Pending|"In Progress"|InProgress|Delayed|"")
                 sleep 5
                 ;;
               *)
@@ -293,19 +293,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
+              --query 'StatusDetails' \
               --output text 2>/dev/null || true)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
+              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
                 ;;
-              Pending|InProgress|Delayed|"")
+              Pending|"In Progress"|InProgress|Delayed|"")
                 sleep 10
                 ;;
               *)
@@ -368,19 +368,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
+              --query 'StatusDetails' \
               --output text 2>/dev/null || true)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|DeliveryTimedOut|ExecutionTimedOut|Undeliverable|Terminated)
+              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
                 aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
                 exit 1
                 ;;
-              Pending|InProgress|Delayed|"")
+              Pending|"In Progress"|InProgress|Delayed|"")
                 sleep 10
                 ;;
               *)
@@ -437,6 +437,7 @@ jobs:
           fi
 
       - name: Detach Volume
+        if: ${{ needs.start_ec2_instance.outputs.volume_id != '' }}
         run: |
           # Detach the volume
           aws ec2 detach-volume --volume-id "$VOLUME_ID"
@@ -444,6 +445,7 @@ jobs:
           echo "Volume $VOLUME_ID detached."
 
       - name: Delete Volume
+        if: ${{ needs.start_ec2_instance.outputs.volume_id != '' }}
         run: |
           # Delete the volume after snapshot is complete
           aws ec2 delete-volume --volume-id "$VOLUME_ID"

From 7675431632507a40d016735eb7a520610166ac99 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 28 Apr 2026 16:53:20 -0700
Subject: [PATCH 10/18] cleanup

---
 .github/workflows/run-simulators.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 16f91089c..91815390f 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -97,7 +97,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: start_ec2_instance
     permissions:
-      id-token: write # Required because this job sends commands through AWS SSM.
+      id-token: write
     steps:
       # Use OIDC to get short-lived AWS credentials for SSM.
       - name: Configure AWS Credentials
@@ -181,7 +181,7 @@ jobs:
     needs: start_ec2_instance
     continue-on-error: true
     permissions:
-      id-token: write # Required because this job sends commands through AWS SSM.
+      id-token: write
     steps:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
@@ -254,7 +254,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: [check_simulator_version_updates, check_nvidia_smi]
     permissions:
-      id-token: write # Required because this job sends commands through AWS SSM.
+      id-token: write
     steps:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
@@ -326,7 +326,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: [check_simulator_version_updates, check_nvidia_smi]
     permissions:
-      id-token: write # Required because this job sends commands through AWS SSM.
+      id-token: write
     steps:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
@@ -400,7 +400,7 @@ jobs:
     name: stop_ec2_instance
     runs-on: ubuntu-latest
     permissions:
-      id-token: write # Required for GitHub OIDC -> AWS role assumption.
+      id-token: write 
     needs:
       [
         start_ec2_instance,

From 0225fe9fe7773a34c8994ba1731d8a91a3a80fd8 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Wed, 29 Apr 2026 07:28:23 -0700
Subject: [PATCH 11/18] cleanup

---
 .github/workflows/run-simulators.yml | 49 +++++++++++++---------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 91815390f..15321c42d 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -5,25 +5,22 @@ on:
   # portal of the repo!!! Do not modify this workflow's trigger!
   workflow_dispatch:
 
-# Deny GitHub token permissions by default; grant only what individual jobs need.
 permissions: {}
 
-# Only one simulator run should use the shared EC2 instance/volume at a time.
+# Ensure only one simulator workflow runs at a time
 concurrency:
   group: sim
   cancel-in-progress: false
 
-# Shared AWS config used by the EC2 and SSM commands.
 env:
-  INSTANCE_ID: ${{ vars.AWS_EC2_INSTANCE_ID }}
-  AWS_DEFAULT_REGION: ${{ vars.AWS_REGION }}
+  INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }}
 
 jobs:
   start_ec2_instance:
     name: start_ec2_instance
     runs-on: ubuntu-latest
     permissions:
-      id-token: write # Required for GitHub OIDC -> AWS role assumption.
+      id-token: write # This is required for OIDC to request the JWT
     outputs:
       volume_id: ${{ steps.create_volume_step.outputs.volume_id }}
     steps:
@@ -31,9 +28,9 @@ jobs:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
-          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
-          aws-region: ${{ vars.AWS_REGION }}
-          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ secrets.AWS_REGION }}
+          allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
 
       - name: Create Volume from Latest Snapshot and Attach to Instance
         id: create_volume_step
@@ -99,13 +96,13 @@ jobs:
     permissions:
       id-token: write
     steps:
-      # Use OIDC to get short-lived AWS credentials for SSM.
+      # Use OIDC to get AWS credentials for SSM.
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
-          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
-          aws-region: ${{ vars.AWS_REGION }}
-          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ secrets.AWS_REGION }}
+          allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
 
       - name: Check for Simulator Version Updates
         env:
@@ -186,9 +183,9 @@ jobs:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
-          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
-          aws-region: ${{ vars.AWS_REGION }}
-          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ secrets.AWS_REGION }}
+          allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
 
       - name: Check NVIDIA SMI
         run: |
@@ -259,9 +256,9 @@ jobs:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
-          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
-          aws-region: ${{ vars.AWS_REGION }}
-          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ secrets.AWS_REGION }}
+          allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
 
       - name: Run CARLA Tests
         run: |
@@ -331,9 +328,9 @@ jobs:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
-          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
-          aws-region: ${{ vars.AWS_REGION }}
-          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ secrets.AWS_REGION }}
+          allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
 
       - name: Run Webots Tests
         run: |
@@ -400,7 +397,7 @@ jobs:
     name: stop_ec2_instance
     runs-on: ubuntu-latest
     permissions:
-      id-token: write 
+      id-token: write
     needs:
       [
         start_ec2_instance,
@@ -416,9 +413,9 @@ jobs:
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
-          role-to-assume: ${{ vars.AWS_ROLE_TO_ASSUME }}
-          aws-region: ${{ vars.AWS_REGION }}
-          allowed-account-ids: ${{ vars.AWS_ACCOUNT_ID }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ secrets.AWS_REGION }}
+          allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
 
       - name: Stop EC2 Instance
         run: |

From 3560233e25ce31a7468d3b389df05b2b06bbc3b9 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Mon, 4 May 2026 10:27:36 -0700
Subject: [PATCH 12/18] clean up simulator workflow

---
 .github/workflows/run-simulators.yml | 66 ++++++++++------------------
 1 file changed, 24 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 15321c42d..9920b9a74 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -115,8 +115,6 @@ jobs:
 
           # SSM runs as root, but CARLA cannot. Use ubuntu for checkout/install too
           # so generated files like parser.py are writable during simulator tests.
-          mkdir -p /home/ubuntu/actions &&
-          chown -R ubuntu:ubuntu /home/ubuntu/actions &&
           sudo -u ubuntu -H bash -lc '
             cd /home/ubuntu/actions/ &&
             rm -rf Scenic &&
@@ -144,23 +142,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'StatusDetails' \
-              --output text 2>/dev/null || true)
+              --query 'Status' \
+              --output text)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
                 exit 1
                 ;;
-              Pending|"In Progress"|InProgress|Delayed|"")
-                sleep 5
-                ;;
-              *)
-                echo "Unexpected SSM status: $status"
+              Pending|InProgress|Delayed)
                 sleep 5
                 ;;
             esac
@@ -212,23 +206,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'StatusDetails' \
-              --output text 2>/dev/null || true)
+              --query 'Status' \
+              --output text)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
                 exit 1
                 ;;
-              Pending|"In Progress"|InProgress|Delayed|"")
-                sleep 5
-                ;;
-              *)
-                echo "Unexpected SSM status: $status"
+              Pending|InProgress|Delayed)
                 sleep 5
                 ;;
             esac
@@ -290,23 +280,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'StatusDetails' \
-              --output text 2>/dev/null || true)
+              --query 'Status' \
+              --output text)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
                 exit 1
                 ;;
-              Pending|"In Progress"|InProgress|Delayed|"")
-                sleep 10
-                ;;
-              *)
-                echo "Unexpected SSM status: $status"
+              Pending|InProgress|Delayed)
                 sleep 10
                 ;;
             esac
@@ -365,23 +351,19 @@ jobs:
             status=$(aws ssm get-command-invocation \
               --command-id "$command_id" \
               --instance-id "$INSTANCE_ID" \
-              --query 'StatusDetails' \
-              --output text 2>/dev/null || true)
+              --query 'Status' \
+              --output text)
 
             case "$status" in
               Success)
                 break
                 ;;
-              Failed|Cancelled|TimedOut|Cancelling|"Delivery Timed Out"|"Execution Timed Out"|Undeliverable|Terminated|Incomplete|"Rate Exceeded")
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text || true
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text || true
+              Failed|Cancelled|TimedOut|Cancelling)
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
+                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
                 exit 1
                 ;;
-              Pending|"In Progress"|InProgress|Delayed|"")
-                sleep 10
-                ;;
-              *)
-                echo "Unexpected SSM status: $status"
+              Pending|InProgress|Delayed)
                 sleep 10
                 ;;
             esac

From e5b60ec97ab263cd3aa0b473abad0c4f3bf901fa Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 5 May 2026 09:40:35 -0700
Subject: [PATCH 13/18] extend SSM execution timeout for CARLA tests

---
 .github/workflows/run-simulators.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 9920b9a74..5324931f0 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -269,10 +269,12 @@ jobs:
           EOF
           )
 
+          # AWS-RunShellScript's executionTimeout defaults to 3600 seconds (1 hour).
+          # CARLA tests can take longer, so allow this SSM command to run for 3 hours.
           command_id=$(aws ssm send-command \
             --instance-ids "$INSTANCE_ID" \
             --document-name AWS-RunShellScript \
-            --parameters "commands=$commands" \
+            --parameters "commands=$commands,executionTimeout=10800" \
             --query 'Command.CommandId' \
             --output text)
 

From 97d09d4b2dc7d5d135c0d017551ac80a8543ae72 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 12 May 2026 12:25:31 -0700
Subject: [PATCH 14/18] Install matching CARLA Python API during simulator
 tests

---
 .github/workflows/run-simulators.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 5324931f0..9defd7e65 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -257,12 +257,17 @@ jobs:
           commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
           #!/bin/bash
           sudo -u ubuntu -H bash -lc '
+            set -e
             cd /home/ubuntu/actions/Scenic &&
             source venv/bin/activate &&
-            carla_versions=($(find /software -maxdepth 1 -type d -name "carla*")) &&
-            for version in "${carla_versions[@]}"; do
-              echo "============================= CARLA $version ============================="
-              export CARLA_ROOT="$version"
+            carla_roots=($(find /software -maxdepth 1 -type d -name "carla*" | sort -V)) &&
+            for carla_root in "${carla_roots[@]}"; do
+              carla_version="${carla_root#/software/carla}"
+              echo "============================= CARLA $carla_version ============================="
+              export CARLA_ROOT="$carla_root"
+              echo "Using CARLA_ROOT=$CARLA_ROOT"
+              echo "Installing Python CARLA API carla==$carla_version"
+              python3 -m pip install --force-reinstall "carla==$carla_version"
               pytest tests/simulators/carla
             done
           '

From 831fd474b607e38aa32aefd7e63dc9f981a08446 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Tue, 26 May 2026 16:48:03 -0700
Subject: [PATCH 15/18] Install CARLA APIs from local wheels in CI

---
 .github/workflows/run-simulators.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 9defd7e65..48b0c6f3f 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -265,9 +265,7 @@ jobs:
               carla_version="${carla_root#/software/carla}"
               echo "============================= CARLA $carla_version ============================="
               export CARLA_ROOT="$carla_root"
-              echo "Using CARLA_ROOT=$CARLA_ROOT"
-              echo "Installing Python CARLA API carla==$carla_version"
-              python3 -m pip install --force-reinstall "carla==$carla_version"
+              python3 -m pip install --force-reinstall "$CARLA_ROOT"/PythonAPI/carla/dist/carla-"$carla_version"-cp310-cp310-*.whl
               pytest tests/simulators/carla
             done
           '

From 7269c187af330fffe3b112277ba0dae1e3c93a17 Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Wed, 27 May 2026 12:11:32 -0700
Subject: [PATCH 16/18] increase timeout for carla CI tests

---
 .github/workflows/run-simulators.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 48b0c6f3f..cc38cedd2 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -249,6 +249,9 @@ jobs:
           role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
           aws-region: ${{ secrets.AWS_REGION }}
           allowed-account-ids: ${{ secrets.AWS_ACCOUNT_ID }}
+          # CARLA tests can exceed the default 1-hour role session.
+          # Keep this longer than the SSM command timeout so we can still fetch logs if SSM times out.
+          role-duration-seconds: 9000  # 2.5 hours
 
       - name: Run CARLA Tests
         run: |
@@ -273,11 +276,11 @@ jobs:
           )
 
           # AWS-RunShellScript's executionTimeout defaults to 3600 seconds (1 hour).
-          # CARLA tests can take longer, so allow this SSM command to run for 3 hours.
+          # Limit the CARLA SSM command to 2 hours so it ends before the AWS role session expires.
           command_id=$(aws ssm send-command \
             --instance-ids "$INSTANCE_ID" \
             --document-name AWS-RunShellScript \
-            --parameters "commands=$commands,executionTimeout=10800" \
+            --parameters "commands=$commands,executionTimeout=7200" \
             --query 'Command.CommandId' \
             --output text)
 

From 86fb7548220921461945a2a1caeda200c1b6c46f Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Fri, 29 May 2026 09:23:50 -0700
Subject: [PATCH 17/18] use helper script for ssm commands

---
 .github/scripts/run-ssm-command.sh   |  44 +++++++
 .github/workflows/run-simulators.yml | 174 +++------------------------
 2 files changed, 64 insertions(+), 154 deletions(-)
 create mode 100755 .github/scripts/run-ssm-command.sh

diff --git a/.github/scripts/run-ssm-command.sh b/.github/scripts/run-ssm-command.sh
new file mode 100755
index 000000000..b9a72723e
--- /dev/null
+++ b/.github/scripts/run-ssm-command.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+INSTANCE_ID="${1:?Missing instance ID}"
+EXECUTION_TIMEOUT="${2:-3600}"
+
+# Build the remote shell script as a JSON array for AWS-RunShellScript.
+commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end')
+
+# Run the script on the EC2 instance through SSM.
+command_id=$(aws ssm send-command \
+  --instance-ids "$INSTANCE_ID" \
+  --document-name AWS-RunShellScript \
+  --parameters "commands=$commands,executionTimeout=$EXECUTION_TIMEOUT" \
+  --query 'Command.CommandId' \
+  --output text)
+
+# Poll SSM until the remote command finishes, then print its output.
+while true; do
+  status=$(aws ssm get-command-invocation \
+    --command-id "$command_id" \
+    --instance-id "$INSTANCE_ID" \
+    --query 'Status' \
+    --output text)
+
+  case "$status" in
+    Success)
+      break
+      ;;
+    Failed|Cancelled|TimedOut|Cancelling)
+      aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
+      aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
+      exit 1
+      ;;
+    Pending|InProgress|Delayed)
+      sleep 10
+      ;;
+  esac
+done
+
+aws ssm get-command-invocation \
+  --command-id "$command_id" \
+  --instance-id "$INSTANCE_ID" \
+  --query 'StandardOutputContent' \
+  --output text
diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index cc38cedd2..37f9122d0 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -95,7 +95,11 @@ jobs:
     needs: start_ec2_instance
     permissions:
       id-token: write
+      contents: read
     steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       # Use OIDC to get AWS credentials for SSM.
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
@@ -108,9 +112,7 @@ jobs:
         env:
           GH_SHA: ${{ github.sha }}
         run: |
-          # Build the remote shell script as a JSON array for AWS-RunShellScript.
-          # The first remote command is #!/bin/bash so SSM runs the script with Bash.
-          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<EOF
+          .github/scripts/run-ssm-command.sh "$INSTANCE_ID" <<EOF
           #!/bin/bash
 
           # SSM runs as root, but CARLA cannot. Use ubuntu for checkout/install too
@@ -127,44 +129,6 @@ jobs:
             python3 .github/check_latest_simulators.py
           '
           EOF
-          )
-
-          # Run the script on the EC2 instance through SSM instead of SSH.
-          command_id=$(aws ssm send-command \
-            --instance-ids "$INSTANCE_ID" \
-            --document-name AWS-RunShellScript \
-            --parameters "commands=$commands" \
-            --query 'Command.CommandId' \
-            --output text)
-
-          # Poll SSM until the remote command finishes, then print its output.
-          while true; do
-            status=$(aws ssm get-command-invocation \
-              --command-id "$command_id" \
-              --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
-              --output text)
-
-            case "$status" in
-              Success)
-                break
-                ;;
-              Failed|Cancelled|TimedOut|Cancelling)
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
-                exit 1
-                ;;
-              Pending|InProgress|Delayed)
-                sleep 5
-                ;;
-            esac
-          done
-
-          aws ssm get-command-invocation \
-            --command-id "$command_id" \
-            --instance-id "$INSTANCE_ID" \
-            --query 'StandardOutputContent' \
-            --output text
 
   check_nvidia_smi:
     name: check_nvidia_smi
@@ -173,7 +137,11 @@ jobs:
     continue-on-error: true
     permissions:
       id-token: write
+      contents: read
     steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
@@ -183,7 +151,7 @@ jobs:
 
       - name: Check NVIDIA SMI
         run: |
-          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          .github/scripts/run-ssm-command.sh "$INSTANCE_ID" <<'EOF'
           #!/bin/bash
           output=$(nvidia-smi) &&
           echo "$output"
@@ -193,42 +161,6 @@ jobs:
             exit 1
           fi
           EOF
-          )
-
-          command_id=$(aws ssm send-command \
-            --instance-ids "$INSTANCE_ID" \
-            --document-name AWS-RunShellScript \
-            --parameters "commands=$commands" \
-            --query 'Command.CommandId' \
-            --output text)
-
-          while true; do
-            status=$(aws ssm get-command-invocation \
-              --command-id "$command_id" \
-              --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
-              --output text)
-
-            case "$status" in
-              Success)
-                break
-                ;;
-              Failed|Cancelled|TimedOut|Cancelling)
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
-                exit 1
-                ;;
-              Pending|InProgress|Delayed)
-                sleep 5
-                ;;
-            esac
-          done
-
-          aws ssm get-command-invocation \
-            --command-id "$command_id" \
-            --instance-id "$INSTANCE_ID" \
-            --query 'StandardOutputContent' \
-            --output text
 
       - name: NVIDIA Driver is not set
         if: ${{ failure() }}
@@ -242,7 +174,11 @@ jobs:
     needs: [check_simulator_version_updates, check_nvidia_smi]
     permissions:
       id-token: write
+      contents: read
     steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
@@ -257,7 +193,7 @@ jobs:
         run: |
           # CARLA refuses to start as root. SSM Run Command runs as root by default,
           # so run the CARLA test block as the ubuntu user.
-          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          .github/scripts/run-ssm-command.sh "$INSTANCE_ID" 7200 <<'EOF'
           #!/bin/bash
           sudo -u ubuntu -H bash -lc '
             set -e
@@ -273,44 +209,6 @@ jobs:
             done
           '
           EOF
-          )
-
-          # AWS-RunShellScript's executionTimeout defaults to 3600 seconds (1 hour).
-          # Limit the CARLA SSM command to 2 hours so it ends before the AWS role session expires.
-          command_id=$(aws ssm send-command \
-            --instance-ids "$INSTANCE_ID" \
-            --document-name AWS-RunShellScript \
-            --parameters "commands=$commands,executionTimeout=7200" \
-            --query 'Command.CommandId' \
-            --output text)
-
-          while true; do
-            status=$(aws ssm get-command-invocation \
-              --command-id "$command_id" \
-              --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
-              --output text)
-
-            case "$status" in
-              Success)
-                break
-                ;;
-              Failed|Cancelled|TimedOut|Cancelling)
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
-                exit 1
-                ;;
-              Pending|InProgress|Delayed)
-                sleep 10
-                ;;
-            esac
-          done
-
-          aws ssm get-command-invocation \
-            --command-id "$command_id" \
-            --instance-id "$INSTANCE_ID" \
-            --query 'StandardOutputContent' \
-            --output text
 
   run_webots_simulators:
     name: run_webots_simulators
@@ -318,7 +216,11 @@ jobs:
     needs: [check_simulator_version_updates, check_nvidia_smi]
     permissions:
       id-token: write
+      contents: read
     steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
         with:
@@ -328,7 +230,7 @@ jobs:
 
       - name: Run Webots Tests
         run: |
-          commands=$(jq -Rs -c 'split("\n") | if .[-1] == "" then .[:-1] else . end' <<'EOF'
+          .github/scripts/run-ssm-command.sh "$INSTANCE_ID" <<'EOF'
           #!/bin/bash
           sudo -u ubuntu -H bash -lc '
             Xvfb :99 -screen 0 1024x768x16 &
@@ -346,42 +248,6 @@ jobs:
             done
           '
           EOF
-          )
-
-          command_id=$(aws ssm send-command \
-            --instance-ids "$INSTANCE_ID" \
-            --document-name AWS-RunShellScript \
-            --parameters "commands=$commands" \
-            --query 'Command.CommandId' \
-            --output text)
-
-          while true; do
-            status=$(aws ssm get-command-invocation \
-              --command-id "$command_id" \
-              --instance-id "$INSTANCE_ID" \
-              --query 'Status' \
-              --output text)
-
-            case "$status" in
-              Success)
-                break
-                ;;
-              Failed|Cancelled|TimedOut|Cancelling)
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardOutputContent' --output text
-                aws ssm get-command-invocation --command-id "$command_id" --instance-id "$INSTANCE_ID" --query 'StandardErrorContent' --output text
-                exit 1
-                ;;
-              Pending|InProgress|Delayed)
-                sleep 10
-                ;;
-            esac
-          done
-
-          aws ssm get-command-invocation \
-            --command-id "$command_id" \
-            --instance-id "$INSTANCE_ID" \
-            --query 'StandardOutputContent' \
-            --output text
 
   stop_ec2_instance:
     name: stop_ec2_instance

From 0b3c947c54e2b4e568b8b34d22e3ba574f7c605a Mon Sep 17 00:00:00 2001
From: lola <keepthechange2021@gmail.com>
Date: Fri, 29 May 2026 10:19:21 -0700
Subject: [PATCH 18/18] pin checkout action

---
 .github/workflows/run-simulators.yml | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml
index 37f9122d0..9b952aec3 100644
--- a/.github/workflows/run-simulators.yml
+++ b/.github/workflows/run-simulators.yml
@@ -98,7 +98,9 @@ jobs:
       contents: read
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
+        with:
+          persist-credentials: false
 
       # Use OIDC to get AWS credentials for SSM.
       - name: Configure AWS Credentials
@@ -140,7 +142,9 @@ jobs:
       contents: read
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
+        with:
+          persist-credentials: false
 
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
@@ -177,7 +181,9 @@ jobs:
       contents: read
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
+        with:
+          persist-credentials: false
 
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37
@@ -219,7 +225,9 @@ jobs:
       contents: read
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
+        with:
+          persist-credentials: false
 
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37