-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathazure-cluster-bootstrap.yaml
More file actions
358 lines (321 loc) · 10.6 KB
/
azure-cluster-bootstrap.yaml
File metadata and controls
358 lines (321 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
---
# This collection of tasks bootstraps the cluster following guidance from
# https://docs.microsoft.com/en-us/azure/virtual-machines/workloads/sap/high-availability-guide-suse-pacemaker
- name: Ensure cluster dependencies are installed
community.general.zypper:
name: "{{ item }}" # Caution, no version control here (yet)
state: present
loop:
- socat
- resource-agents
register: result
until: result is succeeded
retries: 3
delay: 60
- name: Get the status of all extensions
ansible.builtin.command:
cmd: SUSEConnect -s
register: sc_status
changed_when: false
- name: Set public cloud extension fact
ansible.builtin.set_fact:
public_cloud_module: "{{ (sc_status.stdout | from_json | json_query(jmesquery))[0] }}"
vars:
jmesquery: "[? identifier==`sle-module-public-cloud`].status"
- name: Debug
ansible.builtin.debug:
var: public_cloud_module
- name: Activate public cloud module [SLES 12]
ansible.builtin.command:
cmd: SUSEConnect -p sle-module-public-cloud/12/x86_64 # Only works on x86_64 for now!
when:
- ansible_distribution_major_version == '12'
- public_cloud_module != 'Registered'
- name: Activate public cloud module [SLES 15]
ansible.builtin.command:
cmd: "SUSEConnect -p sle-module-public-cloud/{{ ansible_distribution_version }}/x86_64" # Only works on x86_64 for now!
when:
- ansible_distribution_major_version == '15'
- public_cloud_module != 'Registered'
# For SLES 12 a we need to force a downgrade of python-azure-core see https://www.suse.com/support/kb/doc/?id=000020716
- name: Ensure Azure Python SDK and Azure Identity python modules are installed [12sp5]
community.general.zypper:
name: "{{ item }}"
state: present
loop:
- 'python-azure-mgmt-compute'
- 'python-azure-identity'
- 'python-azure-core==1.9.0-2.3.4'
when:
- ansible_distribution_version is version('12.5', '==')
register: result
until: result is succeeded
retries: 3
delay: 60
- name: Ensure Azure Python SDK and Azure Identity python modules are installed [15 sp<4]
community.general.zypper:
name: "{{ item }}"
state: present
loop:
- 'python3-azure-mgmt-compute'
- 'python3-azure-identity'
when:
- ansible_distribution_version is version('15.4', '<')
- ansible_distribution_version is version('12.5', '!=')
register: result
until: result is succeeded
retries: 3
delay: 60
# https://www.suse.com/c/incompatible-changes-ahead-for-public-cloud-sdks/
- name: Ensure Azure Python SDK and Azure Identity python modules are installed [15 sp>=4]
community.general.zypper:
name: "{{ item }}"
state: present
loop:
- 'python311-azure-mgmt-compute'
- 'python311-azure-identity'
when:
- ansible_distribution_version is version('15.4', '>=')
register: result
until: result is succeeded
retries: 3
delay: 60
# The code needs to decide if it is necessary to set DefaultTasksMax to 4096.
# The value may be set to a larger or smaller integer or infinity
- name: Get DefaultTasksMax value # noqa command-instead-of-module
ansible.builtin.command:
cmd: systemctl --no-pager show
register: systemd_result
changed_when: false
- name: Check max tasks infinity
ansible.builtin.set_fact:
max_tasks: infinity
loop: "{{ systemd_result.stdout_lines }}"
when: item is search("DefaultTasksMax=infinity")
- name: Check max tasks integer
ansible.builtin.set_fact:
max_tasks_int: "{{ item | split('=') | last }}"
loop: "{{ systemd_result.stdout_lines }}"
when: item is search("DefaultTasksMax=") and item is not search("DefaultTasksMax=infinity")
- name: Set DefaultTasksMax
ansible.builtin.lineinfile:
path: /etc/systemd/system.conf
regexp: '^DefaultTasksMax='
line: 'DefaultTasksMax=4096'
state: present
backup: true
when: max_tasks is defined and max_tasks is not match('infinity')
- name: Set DefaultTasksMax
ansible.builtin.lineinfile:
path: /etc/systemd/system.conf
regexp: '^DefaultTasksMax='
line: 'DefaultTasksMax=4096'
state: present
backup: true
when:
- max_tasks is defined
- max_tasks_int is defined
- max_tasks_int | int < 4096
- name: Flush handlers
meta: flush_handlers
- name: Ensure 'CLOUD_NETCONFIG_MANAGE' is disabled for eth0
ansible.builtin.lineinfile:
path: /etc/sysconfig/network/ifcfg-eth0
regexp: '^CLOUD_NETCONFIG_MANAGE='
line: 'CLOUD_NETCONFIG_MANAGE="no"'
backup: true
- name: Slurp SBD config
ansible.builtin.slurp:
src: /etc/sysconfig/sbd
register: sbd_slurp
when: use_sbd | bool
- name: Set SBD set_fact
ansible.builtin.set_fact:
sbd_paths: "{{ sbd_slurp['content'] | b64decode | regex_findall('SBD_DEVICE=(.+)') | last }}"
when: use_sbd | bool
- name: Create the cluster on primary [sbd]
ansible.builtin.command:
cmd: >-
crm cluster init
-y
-n qe-cluster
-s {{ sbd_paths }}
-w softdog
-i eth0
-u
creates: /etc/corosync/corosync.conf
when:
- is_primary
- use_sbd | bool
- name: Create the cluster on primary [azure fencing]
ansible.builtin.command:
cmd: >-
crm cluster init
-y
-n qe-cluster
-i eth0
-u
creates: /etc/corosync/corosync.conf
when:
- is_primary
- not use_sbd | bool
# check crm status
- name: Check the cluster on primary
ansible.builtin.command:
cmd: "crm status"
when: is_primary
changed_when: false
- name: Join the cluster
ansible.builtin.command:
creates: /etc/corosync/corosync.conf
cmd: >-
crm cluster join
-y
-c vmhana01
-i eth0
when: not is_primary
- name: Check the cluster on secondary
ansible.builtin.command:
cmd: "crm status"
when: not is_primary
changed_when: false
- name: Ensure correct corosync parameters are set
ansible.builtin.lineinfile:
path: /etc/corosync/corosync.conf
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
backrefs: true
backup: true
with_items:
- {'regexp': '(.*)(token:).*', 'line': '\g<1>\g<2> 30000'}
- {'regexp': '(.*)(token_retransmits_before_loss_const:).*', 'line': '\g<1>\g<2> 10'}
- {'regexp': '(.*)(join:).*', 'line': '\g<1>\g<2> 60'}
- {'regexp': '(.*)(consensus:).*', 'line': '\g<1>\g<2> 36000'}
- {'regexp': '(.*)(max_messages:).*', 'line': '\g<1>\g<2> 20'}
- {'regexp': '(.*)(expected_votes:).*', 'line': '\g<1>\g<2> 2'}
- {'regexp': '(.*)(two_node:).*', 'line': '\g<1>\g<2> 1'}
# The default stonith device must be deleted, first it must be asserted that it exists
- name: Check for pervious ansible creation of stonith devices
ansible.builtin.file:
path: /var/lib/qedep/sbd
state: file
check_mode: true
register: sbd_file_check
changed_when: false
failed_when: false
when:
- is_primary
- name: Set stonith state facts
ansible.builtin.set_fact:
# The crm output comprises of spaces and tabs.
# The below regexp removes all the spaces to make
# it a little easier to split.
# Test version of crm is crmsh-4.3.1+20220321.bd33abac-150200.5.77.1.noarch
sbd_tracer: "{{ sbd_file_check.state }}"
when:
- is_primary
# Split rebuild to it's own task file
- name: Rebuild stonith device
ansible.builtin.include_tasks: ./rebuild-stonith.yaml
when:
- is_primary
- sbd_tracer == 'absent'
- use_sbd | bool
- name: Get cluster status
ansible.builtin.command:
cmd: crm configure show
register: crm_conf_show
changed_when: false
when: is_primary
- name: Set crm maintenance facts
ansible.builtin.set_fact:
crm_maintenance_mode: "{{ (crm_conf_show.stdout | regex_search('maintenance-mode=([a-z]*)', '\\1'))[0] | default('unknown') }}"
rsc_azure_events: "{{ crm_conf_show.stdout | regex_search('primitive rsc_azure-events-az') }}"
cln_azure_events: "{{ crm_conf_show.stdout | regex_search('clone cln_azure-events-az') }}"
rsc_st_azure: "{{ crm_conf_show.stdout | regex_search('rsc_st_azure') }}"
when: is_primary
- name: Ensure maintenance mode is active
ansible.builtin.command:
cmd: crm configure property maintenance-mode=true
when:
- is_primary
- crm_maintenance_mode is false or crm_maintenance_mode == 'unknown'
- name: Configure azure fencing [MSI (Managed identity)]
ansible.builtin.command:
cmd: >-
crm configure primitive
rsc_stonith_azure
stonith:fence_azure_arm
params
msi=true
subscriptionId="{{ subscription_id }}"
resourceGroup="{{ resource_group_name }}"
tenantId="{{ tenant_id }}"
pcmk_monitor_retries=4 pcmk_action_limit=3
power_timeout=240
pcmk_reboot_timeout=900
pcmk_delay_max=15
op monitor interval=3600 timeout=120
when:
- is_primary
- rsc_st_azure | length == 0
- not use_sbd | bool and azure_identity_management == 'msi'
- name: Configure azure fencing [SPN (Service principal)]
ansible.builtin.command:
cmd: >-
crm configure primitive
rsc_stonith_azure
stonith:fence_azure_arm
params
subscriptionId="{{ subscription_id }}"
resourceGroup="{{ resource_group_name }}"
tenantId="{{ tenant_id }}"
login="{{ spn_application_id }}"
passwd="{{ spn_application_password }}"
pcmk_monitor_retries=4
pcmk_action_limit=3
power_timeout=240
pcmk_reboot_timeout=900
op monitor interval=3600 timeout=120
when:
- is_primary
- rsc_st_azure | length == 0
- not use_sbd | bool and azure_identity_management == 'spn'
- name: Add Azure scheduled events to cluster
ansible.builtin.command:
cmd: >-
crm configure primitive
rsc_azure-events-az
ocf:heartbeat:azure-events-az
meta allow-unhealthy-nodes=true
op monitor interval=10s
when:
- is_primary
- rsc_azure_events | length == 0
- name: Clone Azure scheduled events
ansible.builtin.command:
cmd: >-
crm configure clone
cln_azure-events-az
rsc_azure-events-az
when:
- is_primary
- cln_azure_events| length == 0
- name: Refresh cluster status
ansible.builtin.command:
cmd: crm configure show
register: crm_conf_show
changed_when: false
when: is_primary
- name: Reset crm maintenance facts
ansible.builtin.set_fact:
crm_maintenance_mode: "{{ (crm_conf_show.stdout | regex_search('maintenance-mode=([a-z]*)', '\\1'))[0] | default('unknown') }}"
rsc_azure_events: "{{ crm_conf_show.stdout | regex_search('primitive rsc_azure-events-az') }}"
cln_azure_events: "{{ crm_conf_show.stdout | regex_search('clone cln_azure-events-az') }}"
when: is_primary
- name: Ensure maintenance mode is deactivated
ansible.builtin.command:
cmd: crm configure property maintenance-mode=false
when:
- is_primary
- crm_maintenance_mode is true or crm_maintenance_mode == 'unknown'