Skip to content

Commit d6a97eb

Browse files
committed
Fixes #353 - Add health check command
Add a new foremanctl health command that verifies the state of all Foreman services after installation or during troubleshooting. Checks performed: - Core services running (foreman, httpd, redis, postgresql) - Dynflow workers running (orchestrator, worker, worker-hosts-queue) - Pulp services running (pulp-api, pulp-content) - Candlepin service running - Foreman API responding (GET /api/v2/ping) - Foreman tasks status (via Katello ping response) Reports a summary of all failures and exits non-zero if any check fails, making it suitable for scripting and CI use.
1 parent 5efa47a commit d6a97eb

2 files changed

Lines changed: 124 additions & 0 deletions

File tree

src/playbooks/health/health.yaml

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
---
2+
- name: Run Foreman health checks
3+
hosts: quadlet
4+
become: true
5+
gather_facts: true
6+
tags:
7+
- foremanctl_suppress_default_output
8+
vars_files:
9+
- "../../vars/defaults.yml"
10+
- "../../vars/base.yaml"
11+
tasks:
12+
- name: Gather service facts
13+
ansible.builtin.service_facts:
14+
15+
- name: Check core services are running
16+
ansible.builtin.assert:
17+
that:
18+
- "ansible_facts.services[item + '.service'] is defined"
19+
- "ansible_facts.services[item + '.service']['state'] == 'running'"
20+
fail_msg: "Service {{ item }} is not running"
21+
success_msg: "Service {{ item }} is running"
22+
loop:
23+
- foreman
24+
- httpd
25+
- redis
26+
- postgresql
27+
ignore_errors: true
28+
register: service_checks
29+
30+
- name: Check dynflow services are running
31+
ansible.builtin.assert:
32+
that:
33+
- "ansible_facts.services[item + '.service'] is defined"
34+
- "ansible_facts.services[item + '.service']['state'] == 'running'"
35+
fail_msg: "Service {{ item }} is not running"
36+
success_msg: "Service {{ item }} is running"
37+
loop:
38+
- dynflow-sidekiq@orchestrator
39+
- dynflow-sidekiq@worker
40+
- dynflow-sidekiq@worker-hosts-queue
41+
ignore_errors: true
42+
register: dynflow_checks
43+
44+
- name: Check Pulp services are running
45+
ansible.builtin.assert:
46+
that:
47+
- "ansible_facts.services[item + '.service'] is defined"
48+
- "ansible_facts.services[item + '.service']['state'] == 'running'"
49+
fail_msg: "Service {{ item }} is not running"
50+
success_msg: "Service {{ item }} is running"
51+
loop:
52+
- pulp-api
53+
- pulp-content
54+
ignore_errors: true
55+
register: pulp_checks
56+
57+
- name: Check Candlepin service is running
58+
ansible.builtin.assert:
59+
that:
60+
- "ansible_facts.services['candlepin.service'] is defined"
61+
- "ansible_facts.services['candlepin.service']['state'] == 'running'"
62+
fail_msg: "Service candlepin is not running"
63+
success_msg: "Service candlepin is running"
64+
ignore_errors: true
65+
register: candlepin_check
66+
67+
- name: Check Foreman API responds
68+
ansible.builtin.uri:
69+
url: "{{ foreman_url }}/api/v2/ping"
70+
validate_certs: false
71+
status_code: 200
72+
timeout: 10
73+
register: foreman_ping
74+
ignore_errors: true
75+
76+
- name: Report Foreman API status
77+
ansible.builtin.debug:
78+
msg: >-
79+
Foreman API: {{ 'OK' if foreman_ping is success else 'FAILED - ' + (foreman_ping.msg | default('unreachable')) }}
80+
81+
- name: Check Foreman tasks status
82+
ansible.builtin.uri:
83+
url: "{{ foreman_url }}/api/v2/ping"
84+
validate_certs: false
85+
status_code: 200
86+
timeout: 10
87+
register: foreman_ping_detail
88+
ignore_errors: true
89+
90+
- name: Report Foreman tasks status
91+
ansible.builtin.debug:
92+
msg: >-
93+
Foreman tasks: {{ foreman_ping_detail.json.results.katello.services.foreman_tasks.status | default('unknown') }}
94+
when: foreman_ping_detail is success and foreman_ping_detail.json.results.katello is defined
95+
ignore_errors: true
96+
97+
- name: Collect failed checks
98+
ansible.builtin.set_fact:
99+
failed_services: >-
100+
{{
101+
(service_checks.results | default([]) | selectattr('failed', 'equalto', true) | map(attribute='item') | list) +
102+
(dynflow_checks.results | default([]) | selectattr('failed', 'equalto', true) | map(attribute='item') | list) +
103+
(pulp_checks.results | default([]) | selectattr('failed', 'equalto', true) | map(attribute='item') | list) +
104+
([('candlepin')] if candlepin_check is failed else [])
105+
}}
106+
107+
- name: Health check summary
108+
ansible.builtin.debug:
109+
msg: >-
110+
{% if failed_services | length == 0 and foreman_ping is success %}
111+
All health checks passed.
112+
{% else %}
113+
Health check issues found:
114+
{% if failed_services | length > 0 %}Failed services: {{ failed_services | join(', ') }}{% endif %}
115+
{% if foreman_ping is failed %}Foreman API is not responding.{% endif %}
116+
{% endif %}
117+
118+
- name: Fail if any checks failed
119+
ansible.builtin.fail:
120+
msg: "Health check failed: {{ failed_services | length }} service(s) down, Foreman API {{ 'OK' if foreman_ping is success else 'unreachable' }}"
121+
when: failed_services | length > 0 or foreman_ping is failed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
help: |
3+
Run post-install health checks on Foreman services

0 commit comments

Comments
 (0)