From ca945a9c00a67db886bb1fb73df9e57088b662c4 Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Mon, 18 May 2026 17:49:56 -0700 Subject: [PATCH 01/10] fix(localdns): reduce startup polling interval --- parts/linux/cloud-init/artifacts/localdns.sh | 14 +++++---- .../cloud-init/artifacts/localdns_spec.sh | 29 +++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 8d54c4f1430..04595ca172a 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -61,6 +61,7 @@ CURL_COMMAND="curl -s http://${LOCALDNS_NODE_LISTENER_IP}:8181/ready" NETWORKCTL_RELOAD_CMD="networkctl reload" START_LOCALDNS_TIMEOUT=10 +LOCALDNS_POLL_INTERVAL_SECONDS=0.1 # DNS health check timeout. DNS_HEALTH_CHECK_TIMEOUT=2 @@ -419,11 +420,12 @@ start_localdns() { ${COREDNS_COMMAND} & # Wait until the PID file is created. - local elapsed=0 + local elapsed_tenths=0 + local max_elapsed_tenths=$((START_LOCALDNS_TIMEOUT * 10)) while [ ! -f "${LOCALDNS_PID_FILE}" ]; do - sleep 1 - elapsed=$((elapsed + 1)) - if [ "$elapsed" -ge "$START_LOCALDNS_TIMEOUT" ]; then + sleep "${LOCALDNS_POLL_INTERVAL_SECONDS}" + elapsed_tenths=$((elapsed_tenths + 1)) + if [ "$elapsed_tenths" -ge "$max_elapsed_tenths" ]; then echo "Timed out waiting for CoreDNS to create PID file at ${LOCALDNS_PID_FILE}." return 1 fi @@ -454,7 +456,7 @@ wait_for_localdns_ready() { echo "Localdns failed to come online after $timeout_duration seconds (timeout)." return 1 fi - sleep 1 + sleep "${LOCALDNS_POLL_INTERVAL_SECONDS}" ((attempts++)) done echo "Localdns is online and ready to serve traffic." @@ -1059,7 +1061,7 @@ fi start_localdns || exit $ERR_LOCALDNS_FAIL # Wait to direct traffic to localdns until it's ready. -wait_for_localdns_ready 60 60 || exit $ERR_LOCALDNS_FAIL +wait_for_localdns_ready 600 60 || exit $ERR_LOCALDNS_FAIL # Disable DNS from DHCP and point the system at localdns. # -------------------------------------------------------------------------------------------------------------------- diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index d61cf348413..8a729eff3bc 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -707,6 +707,23 @@ EOF The status should be failure The output should include "Timed out waiting for CoreDNS to create PID file" End + + It 'should poll for the PID file every 0.1 seconds' + MOCK_SCRIPT="./mock-coredns.sh" + cat > "$MOCK_SCRIPT" < Date: Wed, 20 May 2026 16:39:08 -0700 Subject: [PATCH 02/10] Make localdns timeout duration-based --- parts/linux/cloud-init/artifacts/localdns.sh | 61 +++++++++++++------ .../cloud-init/artifacts/localdns_spec.sh | 49 ++++++++------- 2 files changed, 70 insertions(+), 40 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 04595ca172a..0d9a9ccb66e 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -61,12 +61,35 @@ CURL_COMMAND="curl -s http://${LOCALDNS_NODE_LISTENER_IP}:8181/ready" NETWORKCTL_RELOAD_CMD="networkctl reload" START_LOCALDNS_TIMEOUT=10 -LOCALDNS_POLL_INTERVAL_SECONDS=0.1 +LOCALDNS_PID_POLL_INTERVAL_SECONDS=0.1 +LOCALDNS_READY_POLL_INTERVAL_SECONDS=0.1 +LOCALDNS_READY_TIMEOUT_SECONDS=60 # DNS health check timeout. DNS_HEALTH_CHECK_TIMEOUT=2 DNS_HEALTH_CHECK_TRIES=2 +# Convert a wall-clock timeout budget into a poll count for the configured interval. +calculate_max_poll_attempts() { + local timeout_duration=$1 + local poll_interval_seconds=$2 + + awk -v timeout="${timeout_duration}" -v interval="${poll_interval_seconds}" ' + BEGIN { + if (timeout < 0 || interval <= 0) { + exit 1 + } + + if (timeout == 0) { + print 0 + exit 0 + } + + printf "%d\n", int((timeout / interval) + 0.999999) + } + ' +} + # Function definitions used in this file. # functions defined until "${__SOURCED__:+return}" are sourced and tested in - # spec/parts/linux/cloud-init/artifacts/localdns_spec.sh. @@ -420,15 +443,21 @@ start_localdns() { ${COREDNS_COMMAND} & # Wait until the PID file is created. - local elapsed_tenths=0 - local max_elapsed_tenths=$((START_LOCALDNS_TIMEOUT * 10)) + local attempts=0 + local max_attempts + max_attempts=$(calculate_max_poll_attempts "${START_LOCALDNS_TIMEOUT}" "${LOCALDNS_PID_POLL_INTERVAL_SECONDS}") || { + echo "Invalid localdns PID poll interval configuration." + return 1 + } + while [ ! -f "${LOCALDNS_PID_FILE}" ]; do - sleep "${LOCALDNS_POLL_INTERVAL_SECONDS}" - elapsed_tenths=$((elapsed_tenths + 1)) - if [ "$elapsed_tenths" -ge "$max_elapsed_tenths" ]; then + if [ "$attempts" -ge "$max_attempts" ]; then echo "Timed out waiting for CoreDNS to create PID file at ${LOCALDNS_PID_FILE}." return 1 fi + + sleep "${LOCALDNS_PID_POLL_INTERVAL_SECONDS}" + attempts=$((attempts + 1)) done COREDNS_PID="$(cat ${LOCALDNS_PID_FILE})" @@ -438,17 +467,15 @@ start_localdns() { # Wait for localdns to be ready to serve traffic. wait_for_localdns_ready() { - local maxattempts=$1 - local timeout_duration=$2 - declare -i attempts=0 - local starttime=$(date +%s) + local timeout_duration=$1 + local starttime + local currenttime + local elapsedtime + + starttime=$(date +%s) echo "Waiting for localdns to start and be able to serve traffic." until [ "$($CURL_COMMAND)" = "OK" ]; do - if [ $attempts -ge $maxattempts ]; then - echo "Localdns failed to come online after $maxattempts attempts." - return 1 - fi # Check for timeout based on elapsed time. currenttime=$(date +%s) elapsedtime=$((currenttime - starttime)) @@ -456,8 +483,8 @@ wait_for_localdns_ready() { echo "Localdns failed to come online after $timeout_duration seconds (timeout)." return 1 fi - sleep "${LOCALDNS_POLL_INTERVAL_SECONDS}" - ((attempts++)) + + sleep "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}" done echo "Localdns is online and ready to serve traffic." return 0 @@ -1061,7 +1088,7 @@ fi start_localdns || exit $ERR_LOCALDNS_FAIL # Wait to direct traffic to localdns until it's ready. -wait_for_localdns_ready 600 60 || exit $ERR_LOCALDNS_FAIL +wait_for_localdns_ready "${LOCALDNS_READY_TIMEOUT_SECONDS}" || exit $ERR_LOCALDNS_FAIL # Disable DNS from DHCP and point the system at localdns. # -------------------------------------------------------------------------------------------------------------------- diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 8a729eff3bc..48d3c3d1bef 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -733,47 +733,50 @@ EOF Describe 'wait_for_localdns_ready' setup() { Include "./parts/linux/cloud-init/artifacts/localdns.sh" + DATE_SEQUENCE_FILE="/tmp/localdns-date-sequence-$$" + SLEEP_LOG_FILE="/tmp/localdns-sleep-log-$$" + } + cleanup() { + rm -f "$DATE_SEQUENCE_FILE" "${DATE_SEQUENCE_FILE}.next" "$SLEEP_LOG_FILE" } BeforeEach 'setup' + AfterEach 'cleanup' #------------------------- wait_for_localdns_ready ----------------------------------------------------------- It 'should return success if localdns is ready' CURL_COMMAND="echo OK" - MAX_ATTEMPTS=100 TIMEOUT=5 - When call wait_for_localdns_ready $MAX_ATTEMPTS $TIMEOUT + When call wait_for_localdns_ready $TIMEOUT The status should be success The output should include "Waiting for localdns to start and be able to serve traffic." The output should include "Localdns is online and ready to serve traffic." End - It 'should return failure if localdns is not ready, after timeout' + It 'should return failure if localdns is not ready after the wall-clock timeout' CURL_COMMAND="echo NOTOK" - MAX_ATTEMPTS=1000 TIMEOUT=2 - When call wait_for_localdns_ready $MAX_ATTEMPTS $TIMEOUT - The status should be failure - The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)." - End + EXPECTED_SLEEP_LOG=$(printf '0.1\n0.1\n') + cat > "$DATE_SEQUENCE_FILE" < "${DATE_SEQUENCE_FILE}.next" + mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE" - It 'should poll readiness every 0.1 seconds' - CURL_COMMAND="echo NOTOK" - MAX_ATTEMPTS=1 - TIMEOUT=50 + echo "$current_time" + } sleep() { - echo "sleep called with: $1" + echo "$1" >> "$SLEEP_LOG_FILE" } - When call wait_for_localdns_ready $MAX_ATTEMPTS $TIMEOUT + When call wait_for_localdns_ready $TIMEOUT The status should be failure - The output should include "sleep called with: 0.1" + The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)." + The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" End End From 00d1242eac0c1422d34b01a6a538545df87f5294 Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Wed, 20 May 2026 16:53:36 -0700 Subject: [PATCH 03/10] Bound localdns readiness polling --- parts/linux/cloud-init/artifacts/localdns.sh | 15 +++++++- .../cloud-init/artifacts/localdns_spec.sh | 35 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 0d9a9ccb66e..7a24b7fb39b 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -471,20 +471,33 @@ wait_for_localdns_ready() { local starttime local currenttime local elapsedtime + local attempts=0 + local max_attempts + + max_attempts=$(calculate_max_poll_attempts "${timeout_duration}" "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}") || { + echo "Invalid localdns readiness poll interval configuration." + return 1 + } starttime=$(date +%s) echo "Waiting for localdns to start and be able to serve traffic." until [ "$($CURL_COMMAND)" = "OK" ]; do + if [ "$attempts" -ge "$max_attempts" ]; then + echo "Localdns failed to come online after ${timeout_duration} seconds (timeout)." + return 1 + fi + # Check for timeout based on elapsed time. currenttime=$(date +%s) elapsedtime=$((currenttime - starttime)) - if [ $elapsedtime -ge $timeout_duration ]; then + if [ "$elapsedtime" -ge "$timeout_duration" ]; then echo "Localdns failed to come online after $timeout_duration seconds (timeout)." return 1 fi sleep "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}" + attempts=$((attempts + 1)) done echo "Localdns is online and ready to serve traffic." return 0 diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 48d3c3d1bef..97ada25b719 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -778,6 +778,41 @@ EOF The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)." The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" End + + It 'should return failure after derived max attempts when the clock does not advance' + CURL_COMMAND="echo NOTOK" + TIMEOUT=2 + LOCALDNS_READY_POLL_INTERVAL_SECONDS=0.5 + EXPECTED_SLEEP_LOG=$(printf '0.5\n0.5\n0.5\n0.5\n') + cat > "$DATE_SEQUENCE_FILE" < "${DATE_SEQUENCE_FILE}.next" + mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE" + + echo "$current_time" + } + sleep() { + echo "$1" >> "$SLEEP_LOG_FILE" + } + When call wait_for_localdns_ready $TIMEOUT + The status should be failure + The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)." + The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" + End End From bc674a332049af9c82c9be9285deab1b63c83edb Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Wed, 20 May 2026 19:08:49 -0700 Subject: [PATCH 04/10] Test localdns PID polling success path --- .../linux/cloud-init/artifacts/localdns_spec.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 97ada25b719..253d9caa87a 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -724,6 +724,22 @@ EOF The status should be failure The output should include "sleep called with: 0.1" End + + It 'should succeed after polling for the PID file every 0.1 seconds' + mock_coredns() { + return 0 + } + COREDNS_COMMAND="mock_coredns" + sleep() { + echo "sleep called with: $1" + echo "12345" > "${LOCALDNS_PID_FILE}" + } + When call start_localdns + The status should be success + The output should include "sleep called with: 0.1" + The output should include "Localdns PID is 12345." + The file "${LOCALDNS_PID_FILE}" should be exist + End End From 61a52cfbbde6ffa8ca0b40f290a3fce4ad5b4a2c Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Wed, 20 May 2026 19:18:37 -0700 Subject: [PATCH 05/10] Clarify localdns timeout guard tests --- parts/linux/cloud-init/artifacts/localdns.sh | 2 +- .../cloud-init/artifacts/localdns_spec.sh | 61 ++++++++++++++++--- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 7a24b7fb39b..7056362cebc 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -484,7 +484,7 @@ wait_for_localdns_ready() { echo "Waiting for localdns to start and be able to serve traffic." until [ "$($CURL_COMMAND)" = "OK" ]; do if [ "$attempts" -ge "$max_attempts" ]; then - echo "Localdns failed to come online after ${timeout_duration} seconds (timeout)." + echo "Localdns failed to come online after ${max_attempts} attempts (safety limit for ${timeout_duration} seconds timeout)." return 1 fi diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 253d9caa87a..c9587154368 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -662,6 +662,44 @@ EOF End +# This section tests - calculate_max_poll_attempts +# This function is defined in parts/linux/cloud-init/artifacts/localdns.sh file. +#------------------------------------------------------------------------------------------------------------------------------------ + Describe 'calculate_max_poll_attempts' + setup() { + Include "./parts/linux/cloud-init/artifacts/localdns.sh" + } + BeforeEach 'setup' + It 'should return the exact attempt count when timeout divides evenly by the interval' + When call calculate_max_poll_attempts 2 0.5 + The status should be success + The output should eq "4" + End + + It 'should round up when timeout does not divide evenly by the interval' + When call calculate_max_poll_attempts 1 0.3 + The status should be success + The output should eq "4" + End + + It 'should return zero attempts for a zero timeout' + When call calculate_max_poll_attempts 0 0.1 + The status should be success + The output should eq "0" + End + + It 'should fail for a negative timeout' + When call calculate_max_poll_attempts -1 0.1 + The status should be failure + End + + It 'should fail for a zero interval' + When call calculate_max_poll_attempts 1 0 + The status should be failure + End + End + + # This section tests - start_localdns # This function is defined in parts/linux/cloud-init/artifacts/localdns.sh file. #------------------------------------------------------------------------------------------------------------------------------------ @@ -709,13 +747,10 @@ EOF End It 'should poll for the PID file every 0.1 seconds' - MOCK_SCRIPT="./mock-coredns.sh" - cat > "$MOCK_SCRIPT" <&2 + return 1 + fi + current_time=$(head -n 1 "$DATE_SEQUENCE_FILE") tail -n +2 "$DATE_SEQUENCE_FILE" > "${DATE_SEQUENCE_FILE}.next" mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE" @@ -815,6 +855,11 @@ EOF date() { local current_time + if [ "$#" -ne 1 ] || [ "$1" != "+%s" ]; then + echo "unexpected date args: $*" >&2 + return 1 + fi + current_time=$(head -n 1 "$DATE_SEQUENCE_FILE") tail -n +2 "$DATE_SEQUENCE_FILE" > "${DATE_SEQUENCE_FILE}.next" mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE" @@ -826,7 +871,7 @@ EOF } When call wait_for_localdns_ready $TIMEOUT The status should be failure - The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)." + The output should include "Localdns failed to come online after 4 attempts (safety limit for ${TIMEOUT} seconds timeout)." The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" End End From b47ca92f0c7326032ccd6a7ab577d4d50c19eb00 Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Thu, 21 May 2026 10:49:32 -0700 Subject: [PATCH 06/10] Validate localdns poll attempt inputs --- parts/linux/cloud-init/artifacts/localdns.sh | 10 +++++++--- .../cloud-init/artifacts/localdns_spec.sh | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 7056362cebc..f3979a49a25 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -76,7 +76,11 @@ calculate_max_poll_attempts() { awk -v timeout="${timeout_duration}" -v interval="${poll_interval_seconds}" ' BEGIN { - if (timeout < 0 || interval <= 0) { + if (timeout !~ /^[0-9]+([.][0-9]+)?$/ || interval !~ /^[0-9]+([.][0-9]+)?$/) { + exit 1 + } + + if (interval <= 0) { exit 1 } @@ -446,7 +450,7 @@ start_localdns() { local attempts=0 local max_attempts max_attempts=$(calculate_max_poll_attempts "${START_LOCALDNS_TIMEOUT}" "${LOCALDNS_PID_POLL_INTERVAL_SECONDS}") || { - echo "Invalid localdns PID poll interval configuration." + echo "Failed to calculate localdns PID poll attempts for timeout ${START_LOCALDNS_TIMEOUT} and interval ${LOCALDNS_PID_POLL_INTERVAL_SECONDS}." return 1 } @@ -475,7 +479,7 @@ wait_for_localdns_ready() { local max_attempts max_attempts=$(calculate_max_poll_attempts "${timeout_duration}" "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}") || { - echo "Invalid localdns readiness poll interval configuration." + echo "Failed to calculate localdns readiness poll attempts for timeout ${timeout_duration} and interval ${LOCALDNS_READY_POLL_INTERVAL_SECONDS}." return 1 } diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index c9587154368..1c69f8fb9e8 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -697,6 +697,16 @@ EOF When call calculate_max_poll_attempts 1 0 The status should be failure End + + It 'should fail for a non-numeric timeout' + When call calculate_max_poll_attempts abc 0.1 + The status should be failure + End + + It 'should fail for a non-numeric interval' + When call calculate_max_poll_attempts 1 abc + The status should be failure + End End @@ -835,6 +845,14 @@ EOF The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" End + It 'should fail if readiness polling attempts cannot be calculated' + CURL_COMMAND="echo NOTOK" + TIMEOUT=abc + When call wait_for_localdns_ready $TIMEOUT + The status should be failure + The output should include "Failed to calculate localdns readiness poll attempts for timeout ${TIMEOUT} and interval ${LOCALDNS_READY_POLL_INTERVAL_SECONDS}." + End + It 'should return failure after derived max attempts when the clock does not advance' CURL_COMMAND="echo NOTOK" TIMEOUT=2 From b232a46c96a343173199376d0490f045ca66fd39 Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Thu, 21 May 2026 11:02:33 -0700 Subject: [PATCH 07/10] Restrict localdns timeout inputs to seconds --- parts/linux/cloud-init/artifacts/localdns.sh | 2 +- spec/parts/linux/cloud-init/artifacts/localdns_spec.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index f3979a49a25..ef233acfccc 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -76,7 +76,7 @@ calculate_max_poll_attempts() { awk -v timeout="${timeout_duration}" -v interval="${poll_interval_seconds}" ' BEGIN { - if (timeout !~ /^[0-9]+([.][0-9]+)?$/ || interval !~ /^[0-9]+([.][0-9]+)?$/) { + if (timeout !~ /^[0-9]+$/ || interval !~ /^[0-9]+([.][0-9]+)?$/) { exit 1 } diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 1c69f8fb9e8..0f8556471ae 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -703,6 +703,11 @@ EOF The status should be failure End + It 'should fail for a fractional timeout' + When call calculate_max_poll_attempts 0.5 0.1 + The status should be failure + End + It 'should fail for a non-numeric interval' When call calculate_max_poll_attempts 1 abc The status should be failure From c5001f5f11b25d1a0551c43687d97a8f702ea889 Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Thu, 21 May 2026 16:55:20 -0700 Subject: [PATCH 08/10] Tighten localdns polling tests --- .../linux/cloud-init/artifacts/localdns_spec.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 0f8556471ae..02ae810b4cf 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -722,10 +722,12 @@ EOF setup() { Include "./parts/linux/cloud-init/artifacts/localdns.sh" LOCALDNS_PID_FILE="/tmp/localdns.pid" + SLEEP_LOG_FILE="/tmp/localdns-start-sleep-log-$$" } cleanup() { rm -f "${LOCALDNS_PID_FILE}" rm -f ./mock-coredns.sh + rm -f "${SLEEP_LOG_FILE}" } BeforeEach 'setup' AfterEach 'cleanup' @@ -780,15 +782,16 @@ EOF return 0 } COREDNS_COMMAND="mock_coredns" + EXPECTED_SLEEP_LOG=$(printf '0.1\n') sleep() { - echo "sleep called with: $1" + echo "$1" >> "$SLEEP_LOG_FILE" echo "12345" > "${LOCALDNS_PID_FILE}" } When call start_localdns The status should be success - The output should include "sleep called with: 0.1" The output should include "Localdns PID is 12345." The file "${LOCALDNS_PID_FILE}" should be exist + The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" End End @@ -821,6 +824,11 @@ EOF CURL_COMMAND="echo NOTOK" TIMEOUT=2 EXPECTED_SLEEP_LOG=$(printf '0.1\n0.1\n') + # Expected date consumption order: + # 1. starttime initialization -> 100 + # 2. first loop timeout check -> 100 + # 3. second loop timeout check -> 101 + # 4. third loop timeout check -> 102 (triggers timeout after two sleeps) cat > "$DATE_SEQUENCE_FILE" < Date: Fri, 22 May 2026 11:52:00 -0700 Subject: [PATCH 09/10] Prioritize localdns timeout reporting --- parts/linux/cloud-init/artifacts/localdns.sh | 10 ++--- .../cloud-init/artifacts/localdns_spec.sh | 44 +++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index ef233acfccc..6736c4a5313 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -487,11 +487,6 @@ wait_for_localdns_ready() { echo "Waiting for localdns to start and be able to serve traffic." until [ "$($CURL_COMMAND)" = "OK" ]; do - if [ "$attempts" -ge "$max_attempts" ]; then - echo "Localdns failed to come online after ${max_attempts} attempts (safety limit for ${timeout_duration} seconds timeout)." - return 1 - fi - # Check for timeout based on elapsed time. currenttime=$(date +%s) elapsedtime=$((currenttime - starttime)) @@ -500,6 +495,11 @@ wait_for_localdns_ready() { return 1 fi + if [ "$attempts" -ge "$max_attempts" ]; then + echo "Localdns failed to come online after ${max_attempts} attempts (safety limit for ${timeout_duration} seconds timeout)." + return 1 + fi + sleep "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}" attempts=$((attempts + 1)) done diff --git a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh index 02ae810b4cf..2d539be9ddb 100644 --- a/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/localdns_spec.sh @@ -858,6 +858,50 @@ EOF The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" End + It 'should prefer the wall-clock timeout message when timeout and attempt cap are both reached' + CURL_COMMAND="echo NOTOK" + TIMEOUT=2 + LOCALDNS_READY_POLL_INTERVAL_SECONDS=0.5 + EXPECTED_SLEEP_LOG=$(printf '0.5\n0.5\n0.5\n0.5\n') + # Expected date consumption order: + # 1. starttime initialization -> 100 + # 2. first loop timeout check -> 100 + # 3. second loop timeout check -> 100 + # 4. third loop timeout check -> 101 + # 5. fourth loop timeout check -> 101 + # 6. fifth loop timeout check -> 102 (wall-clock timeout and attempt cap both true) + cat > "$DATE_SEQUENCE_FILE" <&2 + return 1 + fi + + current_time=$(head -n 1 "$DATE_SEQUENCE_FILE") + tail -n +2 "$DATE_SEQUENCE_FILE" > "${DATE_SEQUENCE_FILE}.next" + mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE" + + echo "$current_time" + } + sleep() { + echo "$1" >> "$SLEEP_LOG_FILE" + } + When call wait_for_localdns_ready $TIMEOUT + The status should be failure + The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)." + The output should not include "safety limit" + The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG" + End + It 'should fail if readiness polling attempts cannot be calculated' CURL_COMMAND="echo NOTOK" TIMEOUT=abc From 40bacba2e5cb2603ce22936903371b31184e243c Mon Sep 17 00:00:00 2001 From: Jingwen Wu Date: Tue, 26 May 2026 17:31:55 -0700 Subject: [PATCH 10/10] Document localdns timeout guards --- parts/linux/cloud-init/artifacts/localdns.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parts/linux/cloud-init/artifacts/localdns.sh b/parts/linux/cloud-init/artifacts/localdns.sh index 6736c4a5313..97e7598a998 100644 --- a/parts/linux/cloud-init/artifacts/localdns.sh +++ b/parts/linux/cloud-init/artifacts/localdns.sh @@ -487,7 +487,8 @@ wait_for_localdns_ready() { echo "Waiting for localdns to start and be able to serve traffic." until [ "$($CURL_COMMAND)" = "OK" ]; do - # Check for timeout based on elapsed time. + # Keep both guards: elapsed time is the real wall-clock timeout, while max_attempts + # guarantees termination if date +%s stalls or does not advance as expected. currenttime=$(date +%s) elapsedtime=$((currenttime - starttime)) if [ "$elapsedtime" -ge "$timeout_duration" ]; then