Skip to content
74 changes: 60 additions & 14 deletions parts/linux/cloud-init/artifacts/localdns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,39 @@ CURL_COMMAND="curl -s http://${LOCALDNS_NODE_LISTENER_IP}:8181/ready"
NETWORKCTL_RELOAD_CMD="networkctl reload"

START_LOCALDNS_TIMEOUT=10
LOCALDNS_PID_POLL_INTERVAL_SECONDS=0.1
LOCALDNS_READY_POLL_INTERVAL_SECONDS=0.1
LOCALDNS_READY_TIMEOUT_SECONDS=60

# DNS health check timeout.
DNS_HEALTH_CHECK_TIMEOUT=2
DNS_HEALTH_CHECK_TRIES=2

# Convert a wall-clock timeout budget into a poll count for the configured interval.
calculate_max_poll_attempts() {
local timeout_duration=$1
local poll_interval_seconds=$2

awk -v timeout="${timeout_duration}" -v interval="${poll_interval_seconds}" '
BEGIN {
if (timeout !~ /^[0-9]+$/ || interval !~ /^[0-9]+([.][0-9]+)?$/) {
exit 1
}

if (interval <= 0) {
exit 1
}
Comment thread
jingwenw15 marked this conversation as resolved.
Comment thread
jingwenw15 marked this conversation as resolved.

if (timeout == 0) {
print 0
exit 0
}

printf "%d\n", int((timeout / interval) + 0.999999)
}
'
}

# Function definitions used in this file.
# functions defined until "${__SOURCED__:+return}" are sourced and tested in -
# spec/parts/linux/cloud-init/artifacts/localdns_spec.sh.
Expand Down Expand Up @@ -419,14 +447,21 @@ start_localdns() {
${COREDNS_COMMAND} &

# Wait until the PID file is created.
local elapsed=0
local attempts=0
local max_attempts
max_attempts=$(calculate_max_poll_attempts "${START_LOCALDNS_TIMEOUT}" "${LOCALDNS_PID_POLL_INTERVAL_SECONDS}") || {
echo "Failed to calculate localdns PID poll attempts for timeout ${START_LOCALDNS_TIMEOUT} and interval ${LOCALDNS_PID_POLL_INTERVAL_SECONDS}."
return 1
}

while [ ! -f "${LOCALDNS_PID_FILE}" ]; do
sleep 1
elapsed=$((elapsed + 1))
if [ "$elapsed" -ge "$START_LOCALDNS_TIMEOUT" ]; then
if [ "$attempts" -ge "$max_attempts" ]; then
echo "Timed out waiting for CoreDNS to create PID file at ${LOCALDNS_PID_FILE}."
return 1
fi

sleep "${LOCALDNS_PID_POLL_INTERVAL_SECONDS}"
attempts=$((attempts + 1))
done

COREDNS_PID="$(cat ${LOCALDNS_PID_FILE})"
Expand All @@ -436,26 +471,37 @@ start_localdns() {

# Wait for localdns to be ready to serve traffic.
wait_for_localdns_ready() {
local maxattempts=$1
local timeout_duration=$2
declare -i attempts=0
local starttime=$(date +%s)
local timeout_duration=$1
local starttime
local currenttime
local elapsedtime
local attempts=0
local max_attempts

max_attempts=$(calculate_max_poll_attempts "${timeout_duration}" "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}") || {
echo "Failed to calculate localdns readiness poll attempts for timeout ${timeout_duration} and interval ${LOCALDNS_READY_POLL_INTERVAL_SECONDS}."
return 1
}

starttime=$(date +%s)

echo "Waiting for localdns to start and be able to serve traffic."
until [ "$($CURL_COMMAND)" = "OK" ]; do
if [ $attempts -ge $maxattempts ]; then
echo "Localdns failed to come online after $maxattempts attempts."
if [ "$attempts" -ge "$max_attempts" ]; then
echo "Localdns failed to come online after ${max_attempts} attempts (safety limit for ${timeout_duration} seconds timeout)."
return 1
fi

# Check for timeout based on elapsed time.
currenttime=$(date +%s)
elapsedtime=$((currenttime - starttime))
if [ $elapsedtime -ge $timeout_duration ]; then
if [ "$elapsedtime" -ge "$timeout_duration" ]; then
echo "Localdns failed to come online after $timeout_duration seconds (timeout)."
return 1
fi
sleep 1
((attempts++))

sleep "${LOCALDNS_READY_POLL_INTERVAL_SECONDS}"
attempts=$((attempts + 1))
done
Comment thread
jingwenw15 marked this conversation as resolved.
echo "Localdns is online and ready to serve traffic."
return 0
Expand Down Expand Up @@ -1059,7 +1105,7 @@ fi
start_localdns || exit $ERR_LOCALDNS_FAIL

# Wait to direct traffic to localdns until it's ready.
wait_for_localdns_ready 60 60 || exit $ERR_LOCALDNS_FAIL
wait_for_localdns_ready "${LOCALDNS_READY_TIMEOUT_SECONDS}" || exit $ERR_LOCALDNS_FAIL

# Disable DNS from DHCP and point the system at localdns.
# --------------------------------------------------------------------------------------------------------------------
Expand Down
179 changes: 169 additions & 10 deletions spec/parts/linux/cloud-init/artifacts/localdns_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -662,17 +662,72 @@ EOF
End


# This section tests - calculate_max_poll_attempts
# This function is defined in parts/linux/cloud-init/artifacts/localdns.sh file.
#------------------------------------------------------------------------------------------------------------------------------------
Describe 'calculate_max_poll_attempts'
setup() {
Include "./parts/linux/cloud-init/artifacts/localdns.sh"
}
BeforeEach 'setup'
It 'should return the exact attempt count when timeout divides evenly by the interval'
When call calculate_max_poll_attempts 2 0.5
The status should be success
The output should eq "4"
End

It 'should round up when timeout does not divide evenly by the interval'
When call calculate_max_poll_attempts 1 0.3
The status should be success
The output should eq "4"
End

It 'should return zero attempts for a zero timeout'
When call calculate_max_poll_attempts 0 0.1
The status should be success
The output should eq "0"
End

It 'should fail for a negative timeout'
When call calculate_max_poll_attempts -1 0.1
The status should be failure
End

It 'should fail for a zero interval'
When call calculate_max_poll_attempts 1 0
The status should be failure
End

It 'should fail for a non-numeric timeout'
When call calculate_max_poll_attempts abc 0.1
The status should be failure
End

It 'should fail for a fractional timeout'
When call calculate_max_poll_attempts 0.5 0.1
The status should be failure
End

It 'should fail for a non-numeric interval'
When call calculate_max_poll_attempts 1 abc
The status should be failure
End
End


# This section tests - start_localdns
# This function is defined in parts/linux/cloud-init/artifacts/localdns.sh file.
#------------------------------------------------------------------------------------------------------------------------------------
Describe 'start_localdns'
setup() {
Include "./parts/linux/cloud-init/artifacts/localdns.sh"
LOCALDNS_PID_FILE="/tmp/localdns.pid"
SLEEP_LOG_FILE="/tmp/localdns-start-sleep-log-$$"
}
cleanup() {
rm -f "${LOCALDNS_PID_FILE}"
rm -f ./mock-coredns.sh
rm -f "${SLEEP_LOG_FILE}"
}
BeforeEach 'setup'
AfterEach 'cleanup'
Expand Down Expand Up @@ -707,6 +762,37 @@ EOF
The status should be failure
The output should include "Timed out waiting for CoreDNS to create PID file"
End

It 'should poll for the PID file every 0.1 seconds'
mock_coredns() {
return 0
}
COREDNS_COMMAND="mock_coredns"
START_LOCALDNS_TIMEOUT=1
sleep() {
echo "sleep called with: $1"
}
When call start_localdns
The status should be failure
The output should include "sleep called with: 0.1"
End

It 'should succeed after polling for the PID file every 0.1 seconds'
mock_coredns() {
return 0
}
COREDNS_COMMAND="mock_coredns"
EXPECTED_SLEEP_LOG=$(printf '0.1\n')
sleep() {
echo "$1" >> "$SLEEP_LOG_FILE"
echo "12345" > "${LOCALDNS_PID_FILE}"
}
When call start_localdns
The status should be success
The output should include "Localdns PID is 12345."
The file "${LOCALDNS_PID_FILE}" should be exist
The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG"
End
End


Expand All @@ -716,35 +802,108 @@ EOF
Describe 'wait_for_localdns_ready'
setup() {
Include "./parts/linux/cloud-init/artifacts/localdns.sh"
DATE_SEQUENCE_FILE="/tmp/localdns-date-sequence-$$"
SLEEP_LOG_FILE="/tmp/localdns-sleep-log-$$"
}
cleanup() {
rm -f "$DATE_SEQUENCE_FILE" "${DATE_SEQUENCE_FILE}.next" "$SLEEP_LOG_FILE"
}
BeforeEach 'setup'
AfterEach 'cleanup'
#------------------------- wait_for_localdns_ready -----------------------------------------------------------
It 'should return success if localdns is ready'
CURL_COMMAND="echo OK"
MAX_ATTEMPTS=100
TIMEOUT=5
When call wait_for_localdns_ready $MAX_ATTEMPTS $TIMEOUT
When call wait_for_localdns_ready $TIMEOUT
The status should be success
The output should include "Waiting for localdns to start and be able to serve traffic."
The output should include "Localdns is online and ready to serve traffic."
End

It 'should return failure if localdns is not ready, after timeout'
It 'should return failure if localdns is not ready after the wall-clock timeout'
CURL_COMMAND="echo NOTOK"
MAX_ATTEMPTS=1000
TIMEOUT=2
When call wait_for_localdns_ready $MAX_ATTEMPTS $TIMEOUT
EXPECTED_SLEEP_LOG=$(printf '0.1\n0.1\n')
# Expected date consumption order:
# 1. starttime initialization -> 100
# 2. first loop timeout check -> 100
# 3. second loop timeout check -> 101
# 4. third loop timeout check -> 102 (triggers timeout after two sleeps)
cat > "$DATE_SEQUENCE_FILE" <<EOF
100
100
101
102
EOF
date() {
local current_time

if [ "$#" -ne 1 ] || [ "$1" != "+%s" ]; then
echo "unexpected date args: $*" >&2
return 1
fi

current_time=$(head -n 1 "$DATE_SEQUENCE_FILE")
tail -n +2 "$DATE_SEQUENCE_FILE" > "${DATE_SEQUENCE_FILE}.next"
mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE"

echo "$current_time"
}
sleep() {
echo "$1" >> "$SLEEP_LOG_FILE"
}
When call wait_for_localdns_ready $TIMEOUT
The status should be failure
The output should include "Localdns failed to come online after ${TIMEOUT} seconds (timeout)."
The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG"
End

It 'should return failure if localdns is not ready, after max attempts'
It 'should fail if readiness polling attempts cannot be calculated'
CURL_COMMAND="echo NOTOK"
MAX_ATTEMPTS=2
TIMEOUT=50
When call wait_for_localdns_ready $MAX_ATTEMPTS $TIMEOUT
TIMEOUT=abc
When call wait_for_localdns_ready $TIMEOUT
The status should be failure
The output should include "Failed to calculate localdns readiness poll attempts for timeout ${TIMEOUT} and interval ${LOCALDNS_READY_POLL_INTERVAL_SECONDS}."
End

It 'should return failure after derived max attempts when the clock does not advance'
CURL_COMMAND="echo NOTOK"
TIMEOUT=2
LOCALDNS_READY_POLL_INTERVAL_SECONDS=0.5
EXPECTED_SLEEP_LOG=$(printf '0.5\n0.5\n0.5\n0.5\n')
cat > "$DATE_SEQUENCE_FILE" <<EOF
100
100
100
100
100
100
100
100
100
100
EOF
date() {
local current_time

if [ "$#" -ne 1 ] || [ "$1" != "+%s" ]; then
echo "unexpected date args: $*" >&2
return 1
fi

current_time=$(head -n 1 "$DATE_SEQUENCE_FILE")
tail -n +2 "$DATE_SEQUENCE_FILE" > "${DATE_SEQUENCE_FILE}.next"
mv "${DATE_SEQUENCE_FILE}.next" "$DATE_SEQUENCE_FILE"

echo "$current_time"
}
sleep() {
echo "$1" >> "$SLEEP_LOG_FILE"
}
When call wait_for_localdns_ready $TIMEOUT
The status should be failure
The output should include "Localdns failed to come online after ${MAX_ATTEMPTS} attempts."
The output should include "Localdns failed to come online after 4 attempts (safety limit for ${TIMEOUT} seconds timeout)."
The contents of file "$SLEEP_LOG_FILE" should eq "$EXPECTED_SLEEP_LOG"
End
End

Expand Down
Loading