From d4c7021542a0509097717382373b31f6d36b1734 Mon Sep 17 00:00:00 2001 From: "John E. Malmberg" Date: Tue, 16 Jun 2026 10:59:32 -0500 Subject: [PATCH] SRE-3842 build: Not for landing! Modified to force an error path. Update post-provisioning error handling to provide better diagnostics. Add Jenkins workspace cleanup in post section to reduce agent disk usage Fix intermittent e-mail delivery by allowing a mail relay setting. Per-file changes: - Jenkinsfile: Add post-build workspace cleanup to reduce disk usage on Jenkins agents. change storage prep stage to use el-9 - ci/gha_functions.sh: - ci/stacktrace.sh: Fix executable permission bit in git repository. - ci/junit.sh: Improve error trap to make it more readable. Fix handling of missing report files. - ci/provisioning/post_provision_config.sh: Add missing source for stacktrace script, add DAOS_FAILURE_STEP markers for ci/junit.sh - ci/provisioning/post_provision_config_common_functions.sh: set STAGE_NAME default to post_provision_config to improve readability of error messages. Install optional mail relay host - docs/dev/documentation.md: Add VS Code cSpell setup guidance and fix markdown linting issues. Change to Linux line endings. - utils/githooks/README.md: Fix markdown linting issues. Signed-off-by: John E. Malmberg --- Jenkinsfile | 52 +- ci/gha_functions.sh | 6 + ci/junit.sh | 202 ++++- ci/provisioning/post_provision_config.sh | 68 +- .../post_provision_config_common_functions.sh | 69 +- .../post_provision_config_nodes.sh | 25 +- .../post_provision_config_nodes_EL.sh | 13 +- ci/stacktrace.sh | 6 + docs/dev/documentation.md | 755 ++++++++++-------- utils/githooks/README.md | 29 +- 10 files changed, 808 insertions(+), 417 deletions(-) mode change 100644 => 100755 ci/gha_functions.sh mode change 100644 => 100755 ci/junit.sh mode change 100644 => 100755 ci/stacktrace.sh diff --git a/Jenkinsfile b/Jenkinsfile index 2c06b52bdb7..94598f58cda 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,4 +1,5 @@ #!/usr/bin/groovy +/* groovylint-disable DuplicateListLiteral */ /* groovylint-disable-next-line LineLength */ /* groovylint-disable DuplicateMapLiteral, DuplicateNumberLiteral */ /* groovylint-disable DuplicateStringLiteral, NestedBlockDepth */ @@ -452,7 +453,8 @@ pipeline { stage('Python Bandit check') { when { beforeAgent true - expression { !skipStage() } + // TESTING: disable for forced-failure diagnostics - REMOVE AFTER TEST + expression { false } } agent { dockerfile { @@ -492,7 +494,8 @@ pipeline { stage('Build on EL 8') { when { beforeAgent true - expression { !skip_build_stage('el8') } + // TESTING: disable for forced-failure diagnostics - REMOVE AFTER TEST + expression { false } } agent { dockerfile { @@ -600,7 +603,8 @@ pipeline { stage('Build on Leap 15') { when { beforeAgent true - expression { !skip_build_stage('leap15') } + // TESTING: disable for forced-failure diagnostics - REMOVE AFTER TEST + expression { false } } agent { dockerfile { @@ -611,7 +615,7 @@ pipeline { deps_build: false) + ' --build-arg DAOS_PACKAGES_BUILD=no ' + ' --build-arg DAOS_KEEP_SRC=yes ' + - " -t ${sanitized_JOB_NAME()}-leap15-gcc" + + " -t ${sanitized_JOB_NAME()}-leap15-gcc" + " -t ${sanitized_JOB_NAME()}-leap15" + ' --build-arg POINT_RELEASE=.6' + " --build-arg PYTHON_VERSION=${env.PYTHON_VERSION}" @@ -656,7 +660,8 @@ pipeline { stage('Unit Tests') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip Unit Tests to reach post-provision mail diagnostics + expression { false } } parallel { stage('Unit Test') { @@ -825,7 +830,8 @@ pipeline { stage('Functional on EL 8.8 with Valgrind') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label vm9_label('EL8') @@ -848,7 +854,8 @@ pipeline { stage('Functional on EL 8') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label vm9_label('EL8') @@ -875,11 +882,15 @@ pipeline { expression { !skipStage() } } agent { - label vm9_label('EL9') + label 'ci_opa-113-test' //vm9_label('EL9') } steps { job_step_update( functionalTest( + // DEBUG: keep cluster-size requirements minimal for mail-path testing + node_count: 1, + test_tag: 'test_daos_management', + ftest_arg: '--yaml_extension single_host', inst_repos: daosRepos(), inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', @@ -896,7 +907,8 @@ pipeline { stage('Functional on Leap 15') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label vm9_label('Leap15') @@ -920,7 +932,8 @@ pipeline { stage('Functional on SLES 15') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label vm9_label('Leap15') @@ -944,7 +957,8 @@ pipeline { stage('Functional on Ubuntu 20.04') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label vm9_label('Ubuntu') @@ -967,7 +981,8 @@ pipeline { stage('Fault injection testing') { when { beforeAgent true - expression { !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label params.CI_FI_1_LABEL @@ -1008,7 +1023,8 @@ pipeline { stage('Test RPMs on EL 9.6') { when { beforeAgent true - expression { params.CI_TEST_EL_RPMs && !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label params.CI_UNIT_VM1_LABEL @@ -1029,7 +1045,8 @@ pipeline { stage('Test RPMs on Leap 15.5') { when { beforeAgent true - expression { params.CI_TEST_LEAP_RPMs && !skipStage() } + // DEBUG: Skip VM-based branch for targeted opa-113 testing + expression { false } } agent { label params.CI_UNIT_VM1_LABEL @@ -1081,7 +1098,7 @@ pipeline { } // stage('Test RPMs on Leap 15.5') } // parallel } // stage('Test') - stage('Test Storage Prep on EL 8.8') { + stage('Test Storage Prep on EL 9') { when { beforeAgent true expression { params.CI_STORAGE_PREP_LABEL != '' } @@ -1232,5 +1249,10 @@ pipeline { unsuccessful { notifyBrokenBranch branches: target_branch } + cleanup { + // Need to clean the workspace to reduce disk space usage on + // Jenkins build agents + cleanWs() + } } // post } diff --git a/ci/gha_functions.sh b/ci/gha_functions.sh old mode 100644 new mode 100755 index 1b776a74595..edd1d65046f --- a/ci/gha_functions.sh +++ b/ci/gha_functions.sh @@ -1,5 +1,11 @@ #!/bin/bash +# +# Copyright 2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# + # TODO: this should produce a JUnit result error_exit() { echo "$1" diff --git a/ci/junit.sh b/ci/junit.sh old mode 100644 new mode 100755 index c7e3a2aee3b..798e8bd2594 --- a/ci/junit.sh +++ b/ci/junit.sh @@ -1,50 +1,228 @@ #!/bin/bash +# +# Copyright 2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# + set -eux +: "${STAGE_NAME:=junit_file_create}" + +junit_sanitized_stage() { + local stage="${STAGE_NAME}" + + stage="$(echo "$stage" | sed 's/[^a-zA-Z0-9_]/_/g' | sed 's/__*/_/g')" + echo "${stage:-unknown_stage}" +} + +junit_classname() { + # Keep package stable and class readable for Jenkins views. + echo "infrastructure.$(junit_sanitized_stage)" +} + junit_result() { local name="$1" local msg="$2" - local stacktrace="${3:-$(stacktrace "Called from" 1)}" + local stacktrace="${3:-}" + local classname + classname="$(junit_classname)" + if [ -z "$stacktrace" ] && declare -F stacktrace > /dev/null; then + stacktrace="$(stacktrace "Called from" 1 || true)" + fi echo -e "$msg" cat < results.xml - + EOF } +junit_pass_result() { + local name="$1" + local msg="${2:-success}" + local classname + classname="$(junit_classname)" + + echo -e "$msg" + + cat < results.xml + + + +EOF +} + +junit_on_error() { + local rc=$? + + # Prevent recursive ERR trap loops while collecting error diagnostics. + trap - ERR + set +e + + local step="${DAOS_FAILURE_STEP:-unknown}" + local cmd="${BASH_COMMAND:-unknown}" + local msg="Unhandled error in ${STAGE_NAME} step=${step} rc=${rc}" + local test_name="${JUNIT_TESTCASE_NAME:-UnhandledError}" + local trace + + trace="Failing command: ${cmd}" + if declare -F stacktrace > /dev/null; then + trace+=$'\n' + trace+="$(stacktrace "Called from" 1 || true)" + fi + + junit_result "$test_name" "$msg" "$trace" || true +} + +expand_junit_nodes() { + local nodes="$1" + local expanded="" + + if command -v nodeset > /dev/null 2>&1; then + expanded="$(nodeset -e "$nodes" 2>/dev/null || true)" + fi + + if [ -n "$expanded" ]; then + tr ' ' '\n' <<< "$expanded" | sed '/^$/d' + return + fi + + tr ',' '\n' <<< "$nodes" | sed '/^$/d' +} + +compute_node_position() { + local nodelist="$1" + local target_node="$2" + local pos=0 + + for node in ${nodelist//,/ }; do + ((pos++)) || true + if [ "$node" = "$target_node" ]; then + echo "$pos" + return 0 + fi + done + echo "0" + return 1 +} + +count_xml_tag() { + local tag="$1" + shift + + # grep exits 1 on no matches; force a zero count instead of + # triggering ERR trap. + (grep -Eho "<${tag}([[:space:]>])" "$@" || true) | wc -l +} + report_junit() { local name="$1" local results="$2" local nodes="$3" - - clush -o '-i ci_key' -l root -w "$nodes" --rcopy "$results" + local rcopy_rc=0 + local artifacts_rc=0 + local missing_results=0 + local tests=0 + local failures=0 + local errors=0 + local skipped=0 local results_files + local expected_nodes + local node + local short_node + local file + local result_node + local existing_node_results=0 + local class_name + local test_name_base + + class_name="$(junit_classname)" + test_name_base="${JUNIT_TESTCASE_BASE:-$name}" + + if ! clush -o '-i ci_key' -l root -w "$nodes" --rcopy "$results"; then + rcopy_rc=$? + echo "ERROR: Failed to copy $results from nodes=$nodes rc=$rcopy_rc" + fi + readarray -t results_files < <(find . -maxdepth 1 -name "$results.*") + readarray -t expected_nodes < <(expand_junit_nodes "$nodes") + + declare -A node_has_result=() + for file in "${results_files[@]}"; do + result_node="$(basename "$file")" + result_node="${result_node#"$results."}" + node_has_result["$result_node"]=1 + node_has_result["${result_node%%.*}"]=1 + done + + local node_pos=0 + for node in "${expected_nodes[@]}"; do + ((node_pos++)) || true + short_node="${node%%.*}" + if [ -n "${node_has_result[$node]:-}" ] || [ -n "${node_has_result[$short_node]:-}" ]; then + ((existing_node_results++)) || true + continue + fi + + missing_results=1 + file="./${results}.${node}" + cat < "$file" + + + +EOF + results_files+=("$file") + done if [ ${#results_files[@]} -eq 0 ]; then - echo "No results found to report as JUnit results" - ls -l - return + missing_results=1 + file="./${results}.unknown" + cat < "$file" + + + +EOF + results_files+=("$file") + fi + + tests=$(count_xml_tag testcase "${results_files[@]}") + failures=$(count_xml_tag failure "${results_files[@]}") + errors=$(count_xml_tag error "${results_files[@]}") + skipped=$(count_xml_tag skipped "${results_files[@]}") + + if [ "$tests" -eq 0 ]; then + tests=${#expected_nodes[@]} + if [ "$tests" -eq 0 ]; then + tests=${#results_files[@]} + fi fi mkdir -p "$STAGE_NAME"/framework/ cat < "$STAGE_NAME"/framework/framework_results.xml - + $(cat "${results_files[@]}") EOF - clush -o '-i ci_key' -l root -w "$nodes" --rcopy /var/tmp/artifacts \ - --dest "$STAGE_NAME"/framework/ + if ! clush -o '-i ci_key' -l root -w "$nodes" --rcopy /var/tmp/artifacts \ + --dest "$STAGE_NAME"/framework/; then + artifacts_rc=$? + echo "WARNING: Failed to copy /var/tmp/artifacts from nodes=$nodes rc=$artifacts_rc" + fi + + if [ "$rcopy_rc" -ne 0 ] || [ "$missing_results" -ne 0 ]; then + return 1 + fi + return 0 } # create this dir so that the remote copy doesn't fail if nothing actually populates it @@ -54,4 +232,4 @@ mkdir -p /var/tmp/artifacts set -E # set an error trap to create a junit result for any unhandled error -trap 'junit_result "UnhandledError" "Unhandled error in ${STAGE_NAME} Post Restore Script step"' ERR +trap 'junit_on_error' ERR diff --git a/ci/provisioning/post_provision_config.sh b/ci/provisioning/post_provision_config.sh index 6aab32f3ab1..23226b46f82 100755 --- a/ci/provisioning/post_provision_config.sh +++ b/ci/provisioning/post_provision_config.sh @@ -23,6 +23,8 @@ EOF # shellcheck disable=SC1091 source ci/provisioning/post_provision_config_common_functions.sh # shellcheck disable=SC1091 +source ci/stacktrace.sh +# shellcheck disable=SC1091 source ci/junit.sh # This script needs to be able to run outside of CI for testing. @@ -71,10 +73,15 @@ FAMILY="${DISTRO%%_*}" if [ -n "$ARTIFACTORY_URL" ] && [ -z "$REPO_FILE_URL" ]; then REPO_FILE_URL="$ARTIFACTORY_URL/repo-files/" fi +# This is an NFS share for looking up current test information. +: "${DAOS_CI_INFO_DIR:=}" # CI user can be any user that is not expected to be on the test systems. : "${CI_USER:=jenkins}" +: "${DAOS_FAILURE_STEP:=startup}" + +DAOS_FAILURE_STEP="copy_ci_keys" retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ci_key* --dest=/tmp/ function create_host_file() { @@ -100,6 +107,7 @@ function create_host_file() { } if [ "$NODESTRING" != "localhost" ]; then + DAOS_FAILURE_STEP="update_hosts_file" if create_host_file "$NODESTRING" "./hosts" "/etc/hosts"; then retry_cmd 300 clush -B -S -l root -w "$NODESTRING" \ -c ./hosts --dest=/etc/hosts @@ -112,8 +120,18 @@ fi # shellcheck disable=SC2001 sanitized_commit_message="$(echo "$COMMIT_MESSAGE" | sed -e 's/\(["\$]\)/\\\1/g')" -if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \ - "export PS4='$PS4' +build_remote_post_provision_payload() { + local script_path + local remote_script_files=( + "ci/stacktrace.sh" + "ci/junit.sh" + "ci/provisioning/post_provision_config_common_functions.sh" + "ci/provisioning/post_provision_config_common.sh" + "ci/provisioning/post_provision_config_nodes_${FAMILY}.sh" + "ci/provisioning/post_provision_config_nodes.sh" + ) + + REMOTE_POST_PROVISION_PAYLOAD="export PS4='$PS4' MY_UID=$(id -u) CI_USER=\"${CI_USER}\" CONFIG_POWER_ONLY=${CONFIG_POWER_ONLY:-} @@ -129,6 +147,8 @@ if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \ BUILD_URL=\"${BUILD_URL:-}\" STAGE_NAME=\"${STAGE_NAME:-}\" OPERATIONS_EMAIL=\"${OPERATIONS_EMAIL:-}\" + DAOS_SMTP_RELAY=\"${DAOS_SMTP_RELAY:-}\" + NODELIST=\"${NODESTRING:-}\" COMMIT_MESSAGE=\"$sanitized_commit_message\" REPO_FILE_URL=\"$REPO_FILE_URL\" ARTIFACTORY_URL=\"${ARTIFACTORY_URL}\" @@ -140,19 +160,47 @@ if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \ REPO_PATH=\"${REPO_PATH:-}\" ARTIFACTS_URL=\"${ARTIFACTS_URL:-}\" COVFN_DISABLED=\"${COVFN_DISABLED:-true}\" - DAOS_CI_INFO_DIR=\"${DAOS_CI_INFO_DIR:?DAOS_CI_INFO_DIR is missing. Can not continue with node(s) provisioning process}\" + DAOS_CI_INFO_DIR=\"${DAOS_CI_INFO_DIR}\" CI_SCONS_ARGS=\"${CI_SCONS_ARGS:-}\" - PYTHON_VERSION=\"${PYTHON_VERSION}\" - $(cat ci/stacktrace.sh) - $(cat ci/junit.sh) - $(cat ci/provisioning/post_provision_config_common_functions.sh) - $(cat ci/provisioning/post_provision_config_common.sh) - $(cat ci/provisioning/post_provision_config_nodes_"$FAMILY".sh) - $(cat ci/provisioning/post_provision_config_nodes.sh)"; then + PYTHON_VERSION=\"${PYTHON_VERSION}\"" + + for script_path in "${remote_script_files[@]}"; do + if [ ! -r "$script_path" ]; then + echo "ERROR: Missing remote payload file: $script_path" + return 2 + fi + if ! bash -n "$script_path"; then + echo "ERROR: Syntax check failed for remote payload file: $script_path" + return 2 + fi + REMOTE_POST_PROVISION_PAYLOAD+=$'\n' + REMOTE_POST_PROVISION_PAYLOAD+="$(<"$script_path")" + done + + return 0 +} + +DAOS_FAILURE_STEP="remote_payload_build" +if ! build_remote_post_provision_payload; then + junit_result "remote_payload_build" "Failed to build remote post-provision payload" + report_junit post_provision_config.sh results.xml "$NODESTRING" || true + exit 1 +fi + +DAOS_FAILURE_STEP="remote_post_provision" +if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \ + "$REMOTE_POST_PROVISION_PAYLOAD"; then report_junit post_provision_config.sh results.xml "$NODESTRING" exit 1 fi +DAOS_FAILURE_STEP="report_junit" +if ! report_junit post_provision_config.sh results.xml "$NODESTRING"; then + echo "ERROR: Failed to collect node JUnit results" + exit 1 +fi + +DAOS_FAILURE_STEP="publish_commit_metadata" git log --format=%B -n 1 HEAD | sed -ne '1s/^\([A-Z][A-Z]*-[0-9][0-9]*\) .*/\1/p' \ -e '/^Fixes:/{s/^Fixes: *//;s/ /\ /g;p}' | \ diff --git a/ci/provisioning/post_provision_config_common_functions.sh b/ci/provisioning/post_provision_config_common_functions.sh index 4950d34008e..103960597c5 100755 --- a/ci/provisioning/post_provision_config_common_functions.sh +++ b/ci/provisioning/post_provision_config_common_functions.sh @@ -11,14 +11,16 @@ set -eux : "${DAOS_STACK_RETRY_DELAY_SECONDS:=60}" : "${DAOS_STACK_RETRY_COUNT:=3}" : "${DAOS_STACK_MONITOR_SECONDS:=600}" +: "${DAOS_STACK_NON_RETRY_EXIT_CODES:=2 126 127}" : "${BUILD_URL:=Not_in_jenkins}" -: "${STAGE_NAME:=Unknown_Stage}" +: "${STAGE_NAME:=post_provision_config}" : "${OPERATIONS_EMAIL:=$USER@localhost}" : "${JENKINS_URL:=https://jenkins.example.com}" domain1="${JENKINS_URL#https://}" mail_domain="${domain1%%/*}" : "${EMAIL_DOMAIN:=$mail_domain}" : "${DAOS_DEVOPS_EMAIL:="$HOSTNAME"@"$EMAIL_DOMAIN"}" +: "${DAOS_SMTP_RELAY:=}" # functions common to more than one distro specific provisioning url_to_repo() { @@ -80,6 +82,58 @@ dump_repos() { cat "$file" done } + +configure_postfix_relay() { + # Enable and start postfix on all distros that have it. + # This is done unconditionally so mail delivery works on LEAP/SLES as + # well as EL without requiring distro-specific bootstrap hooks. + if command -v systemctl >/dev/null 2>&1 && command -v postfix >/dev/null 2>&1; then + systemctl enable postfix.service 2>/dev/null || true + systemctl start postfix.service 2>/dev/null || true + postfix_start_exit=$? + if [ $postfix_start_exit -ne 0 ]; then + echo "WARNING: Postfix not started: $postfix_start_exit" + systemctl status postfix.service || true + fi + fi + + # Apply optional site-specific relay override. + local relay="${DAOS_SMTP_RELAY:-}" + local normalized="" + local host="" + local port="" + + relay="${relay//[[:space:]]/}" + if [ -z "$relay" ]; then + return 0 + fi + + if ! command -v postconf >/dev/null 2>&1; then + echo "WARNING: DAOS_SMTP_RELAY is set, but postconf is not available" + return 0 + fi + + # Normalize relayhost format for Postfix direct host relay. + if [[ "$relay" == \[*\] ]] || [[ "$relay" == \[*\]:* ]]; then + normalized="$relay" + elif [[ "$relay" == *:* ]] && [[ "$relay" != *:*:* ]] && [[ "${relay#*:}" =~ ^[0-9]+$ ]]; then + host="${relay%%:*}" + port="${relay##*:}" + normalized="[$host]:$port" + else + normalized="[$relay]" + fi + + echo "INFO: Setting postfix relayhost from DAOS_SMTP_RELAY to '$normalized'" + postconf -e "relayhost = $normalized" + # Reload so the running daemon picks up the new relayhost. + if command -v systemctl >/dev/null 2>&1; then + systemctl reload postfix.service 2>/dev/null || \ + systemctl restart postfix.service 2>/dev/null || true + fi + postconf -nh relayhost || true +} + retry_dnf() { local monitor_threshold="$1" shift @@ -107,7 +161,6 @@ retry_dnf() { return 0 fi # Command failed, retry - rc=${PIPESTATUS[0]} (( attempt++ )) || true if [ "$attempt" -gt 0 ]; then # shellcheck disable=SC2154 @@ -177,6 +230,7 @@ retry_cmd() { local attempt=0 local rc=0 + local non_retry_codes=" ${DAOS_STACK_NON_RETRY_EXIT_CODES} " while [ $attempt -lt "${RETRY_COUNT:-$DAOS_STACK_RETRY_COUNT}" ]; do if monitor_cmd "$monitor_threshold" "$@"; then # Command succeeded, return with success @@ -187,7 +241,11 @@ retry_cmd() { return 0 fi # Command failed, retry - rc=${PIPESTATUS[0]} + rc=$? + if [[ "$non_retry_codes" == *" $rc "* ]]; then + echo "Command retry aborted for non-retryable exit status: $rc" + break + fi (( attempt++ )) || true if [ "$attempt" -gt 0 ]; then sleep "${RETRY_DELAY_SECONDS:-$DAOS_STACK_RETRY_DELAY_SECONDS}" @@ -211,7 +269,9 @@ timeout_cmd() { local attempt=0 local rc=1 while [ $attempt -lt "${RETRY_COUNT:-$DAOS_STACK_RETRY_COUNT}" ]; do - if monitor_cmd "$DAOS_STACK_MONITOR_SECONDS" timeout "$timeout" "$@"; then + monitor_cmd "$DAOS_STACK_MONITOR_SECONDS" timeout "$timeout" "$@" + rc=$? + if [ "$rc" -eq 0 ]; then # Command succeeded, return with success if [ $attempt -gt 0 ]; then send_mail "Command timeout successful in $STAGE_NAME after $attempt attempts" \ @@ -219,7 +279,6 @@ timeout_cmd() { fi return 0 fi - rc=${PIPESTATUS[0]} if [ "$rc" = "124" ]; then # Command timed out, try again (( attempt++ )) || true diff --git a/ci/provisioning/post_provision_config_nodes.sh b/ci/provisioning/post_provision_config_nodes.sh index c62c7064cad..8a3d8ad2fed 100644 --- a/ci/provisioning/post_provision_config_nodes.sh +++ b/ci/provisioning/post_provision_config_nodes.sh @@ -9,6 +9,19 @@ set -eux env > /root/last_run-env.txt +# Compute node position in cluster for stable JUnit naming (matches functional tests) +myhost="${HOSTNAME%%.*}" +: "${NODELIST:=$myhost}" +mynodenum=0 +for node in ${NODELIST//,/ }; do + ((mynodenum++)) || true + if [ "$node" = "$myhost" ]; then break; fi +done +export mynodenum +JUNIT_TESTCASE_BASE="Post Provision Working" +JUNIT_TESTCASE_NAME="$JUNIT_TESTCASE_BASE Node $mynodenum" +export JUNIT_TESTCASE_BASE JUNIT_TESTCASE_NAME + # Need this fix earlier # For some reason sssd_common must be reinstalled # to fix up the restored image. @@ -16,6 +29,9 @@ if command -v dnf; then bootstrap_dnf fi +# Apply optional site-specific SMTP relay override across all distro paths. +configure_postfix_relay + # If in CI use made up user "Jenkins" with UID that the build agent is # currently using. Not sure that the UID is actually important any more # and that parameter can probably be removed in the future. @@ -64,7 +80,7 @@ fi # defined in ci/functional/post_provision_config_nodes_.sh # and catted to the remote node along with this script if ! post_provision_config_nodes; then - rc=${PIPESTATUS[0]} + rc=$? echo "post_provision_config_nodes failed with rc=$rc" exit "$rc" fi @@ -78,9 +94,8 @@ if lspci | grep -i nvme; then daos_server nvme reset && rmmod vfio_pci && modprobe vfio_pci fi - systemctl enable nfs-server.service systemctl start nfs-server.service -sync -sync -exit 0 +# TESTING: Force post-provision error path validation on this branch. +echo "TESTING: forcing post-provision failure to validate ERR trap + email path" +bash -c 'exit 2' diff --git a/ci/provisioning/post_provision_config_nodes_EL.sh b/ci/provisioning/post_provision_config_nodes_EL.sh index d57d267be4e..35aadbb4aa9 100644 --- a/ci/provisioning/post_provision_config_nodes_EL.sh +++ b/ci/provisioning/post_provision_config_nodes_EL.sh @@ -6,17 +6,8 @@ # SPDX-License-Identifier: BSD-2-Clause-Patent bootstrap_dnf() { -set +e - systemctl enable postfix.service - systemctl start postfix.service - postfix_start_exit=$? - if [ $postfix_start_exit -ne 0 ]; then - echo "WARNING: Postfix not started: $postfix_start_exit" - systemctl status postfix.service - journalctl -xe -u postfix.service - fi -set -e - # Seems to be needed to fix some issues. + # This must be the first DNF operation after image restore to avoid + # repeated noisy DNF logging in subsequent package operations. dnf -y reinstall sssd-common } diff --git a/ci/stacktrace.sh b/ci/stacktrace.sh old mode 100644 new mode 100755 index c00ee6d3776..c2c37be387f --- a/ci/stacktrace.sh +++ b/ci/stacktrace.sh @@ -1,5 +1,11 @@ #!/bin/bash +# +# Copyright 2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# + stacktrace() { local msg=${1:-"Unchecked error condition at"} local i=${2:-0} diff --git a/docs/dev/documentation.md b/docs/dev/documentation.md index 6e9f290f8ee..dc487b1af23 100644 --- a/docs/dev/documentation.md +++ b/docs/dev/documentation.md @@ -1,354 +1,401 @@ -# DAOS Documentation - -Documentation on DAOS is available in the following places: - -* The [https://daos.io/](https://daos.io/) website contains general - information about the DAOS Foundation, a subproject of the - Linux Foundation. This website is maintained by the - DAOS Foundation and uses Wordpress for content management. - -* The DAOS Community Wiki [https://wiki.daos.io/](https://wiki.daos.io/) - is a redirection to - [https://daosio.atlassian.net/wiki/spaces/DC]( - https://daosio.atlassian.net/wiki/spaces/DC), - an Atlassian Wiki space. - -* Documentation for end users and administrators of DAOS is - available at [https://docs.daos.io/](https://docs.daos.io/). - This website provides versioned documentation for each DAOS release. - Its web pages are created from the contents in the - [daos-stack/daos/docs/](https://github.com/daos-stack/daos/tree/master/docs) - trees of the DAOS release branches in Github. - This website also cross-reference the "DAOS Internals" documentation - (in the "Developer Zone" section). - -* The `dmg` and `daos` commands have man-pages that are created at - DAOS build time through hidden `manpage` options of these commands. - The contents of these man-pages is embedded in the Go code for - the commands. The man-pages currently only output concatenated - lists of all subcommands and their options. - See - [src/control/cmd/dmg/README.md](https://github.com/daos-stack/daos/blob/master/src/control/cmd/dmg/README.md) - and - [src/control/cmd/daos/README.md](https://github.com/daos-stack/daos/blob/master/src/control/cmd/daos/README.md). - -* Most DAOS commands and subcommands have a `--help` option which - prints one-line descriptions of the (sub)command and its options. - The contents of these help texts is embedded in the Go code - (or C code) for the commands. - -* Developer-facing "DAOS Internals" documentation of the - DAOS software is maintained as Markdown files within the - [daos-stack/daos/src/](https://github.com/daos-stack/daos/tree/master/src) - source tree of the DAOS Github repository. - Make sure to access the release branch (`master`, `release/2.8`, etc.) - for the version of DAOS you are working with. - -* DAOS API documentation is generated from the DAOS source files using - `doxygen`. Running `doxygen` during the website deployment process for - [https://docs.daos.io/](https://docs.daos.io/) - creates HTML pages in `docs/doxygen/html/`, - which are then made available in the "Developer Zone" section of - [https://docs.daos.io/](https://docs.daos.io/). - -The following sections describe how the multi-versioned documentation -on [https://docs.daos.io/](https://docs.daos.io/) -is generated from the documentation Markdown files in the Github -`daos-stack/daos/docs/` tree (using the `mkdocs` and `mike` tools, -and the `doxygen` tool for the API documentation). - -## Configuration files in Github - -### Mkdocs configuration - -The [MkDocs](https://www.mkdocs.org/) tool is a static site generator -that creates web pages from Markdown files. -The `mkdocs` command is driven by a single configuration file, -[mkdocs.yml](https://github.com/daos-stack/daos/tree/master/mkdocs.yml) -in the toplevel directory of the DAOS project on github. -Among other project information, this file defines the -menu structure for the navigation bar of the website that is being created. -Each DAOS release branch uses its own version of `mkdocs.yml`, -as some information in that file is version specific or uses URLs -that are pointing to that specific DAOS version. - -!!! note - Absolute paths or hardcoded version numbers in the `mkdocs.yml` file - should **not** be changed to relative URLs/paths, - as that will break some of the processing by downstream tools. - These specifics need to be updated once when a new release branch is - created, and again when an existing release branch that was under - development reaches General Availability (GA). - Examples with detailed steps can be found below. - -The `mkdocs` tool creates a `sites` subdirectory in the toplevel -directory of the DAOS project on github, -in which the HTML documents for the static website are created. -To prevent this temporary subdirectory to be accidentally committed -into github, the -[.gitignore](https://github.com/daos-stack/daos/blob/master/.gitignore) -file contains a `site/` line. - -### Mike configuration - -The `mike` plugin is used with -[Materials for MkDocs](https://squidfunk.github.io/mkdocs-material/setup/setting-up-versioning/) -to generate versioned documentation for each of the DAOS release branches. -Its configuration information is stored in the `versions.json` file -that resides in a new `gh-pages` branch in a checked out version -of the DAOS repository. -Note that this file is **not** committed into the main DAOS-Stack -github repo. It only exists in the local `gh-pages` branch -on the system on which the webpages are generated. - -The `versions.json` file should **not** be edited directly. -The `mike` command should be used to define which versions of the -documentation are being created, which aliases should be set up, -and which version should be the default version to display. -See below for details. - -### Doxygen configuration - -The configuration file for the `doxygen` tool is -[Doxyfile](https://github.com/daos-stack/daos/blob/master/Doxyfile), -located in the toplevel directory of the DAOS project on github. - -## Installing the mkdocs Software - -On the machine where the DAOS documentation is to be built, -GIT needs to be set up so the `daos-stack/daos` project -can be checked out. -In addition, the MkDocs package and plugins need to be installed. -Running `pip install mike` should install the prerequisite packages. - -Depending on the operation system and Python environment, -prerequisite software and the mkdocs plugins may have to be explicitly -installed. For example, on a Windows laptop running Cygwin: - -``` -pip3 install alabaster sphinxcontrib-applehelp sphinxcontrib-devhelp sphinxcontrib-htmlhelp sphinxcontrib-jsmath sphinxcontrib-qthelp - -pip3 install --user mkdocs -pip3 install --user mkdocs-material -pip3 install --user mike -export PATH="$HOME/.local/bin:$PATH" -mike --version -``` - -## Checking out DAOS from github - -To create the DAOS documentation webpages, it is recommended to work -on a clone of the DAOS project on github that is **separate** from any -checked out version that may be used for code development work. - -``` -cd ~/dev -git clone git@github.com:daos-stack/daos.git daos-website -cd daos-website -git branch # should be on master branch -grep site .gitignore # should show "site/" -grep doxy .gitignore # should show "docs/doxygen/" -mike list # should show nothing on a new setup -``` - -## Creating the static website contents - -To create the static website contents, the following process is used. -For each DAOS release branch that should get included on the website: - -* the git release branch is checked out - -* running `doxygen` creates a temporary version of the doxygen documentation - for this release in the `docs/doxygen/html` subdirectory. - -!!! note - To prevent these files from being accidentally committed into github, - the [.gitignore](https://github.com/daos-stack/daos/blob/master/.gitignore) - file contains a `docs/doxygen/` line. - -* `mike deploy` is called with a title, a name for the release, - and optional alias name(s) for this release. - These will be used in the multi-versioned website's navigation system. - - - `mike` first uses `mkdocs` to create a temporary copy of the - static webpages for this release in the `site/` subdirectory. - - - `mike` will then copy that `site/` contents into the temporary `gh-pages` - branch, with a directory name that matches the release name - specified on the `mike deploy` invocation. - - - Any aliases for that release will be created as symlinks in the - `gh-pages` branch, pointing to the directory with the main release name. - -This process is repeated for each release. After all versions have been -processed this way, invoking `mike default` will set the version that is -displayed on the website by default. -That command creates a toplevel `index.html` in the `gh-pages` branch which -contains a redirect to the version that was selected as the default. - -A complete example for a website where DAOS 2.6 is the current "latest" -version, and the master branch is the "2.7" development branch for what -will eventually become DAOS 2.8: - -``` -mike delete --all - -git checkout master -rm -rf docs/doxygen/html 2>/dev/null -doxygen -mike deploy -t "v2.7 - master" master v2.7 v2.8 2>&1 | tee deploy-log.master.txt -rm -rf docs/doxygen/html - -git checkout release/2.6 -rm -rf docs/doxygen/html 2>/dev/null -doxygen -mike deploy -t "v2.6 - latest" v2.6 latest 2>&1 | tee deploy-log.v2.6.txt -rm -rf docs/doxygen/html - -git checkout release/2.4 -rm -rf docs/doxygen/html 2>/dev/null -doxygen -mike deploy -t "v2.4 - deprecated" v2.4 2>&1 | tee deploy-log.v2.4.txt -rm -rf docs/doxygen/html - -mike set-default latest -mike list -``` - -After `release/2.8` has been branched, but before it becomes generally -available (GA), the website version structure should be updated like this: - -``` -mike delete master # will also delete the v2.7 v2.8 aliases - -git checkout master -rm -rf docs/doxygen/html 2>/dev/null -doxygen -mike deploy -t "v2.9 - master" master v2.9 v3.0 2>&1 | tee deploy-log.master.txt -rm -rf docs/doxygen/html - -git checkout release/2.8 -rm -rf docs/doxygen/html 2>/dev/null -doxygen -mike deploy -t "v2.8 - rc" v2.8 rc 2>&1 | tee deploy-log.v2.8.txt -rm -rf docs/doxygen/html - -mike list -``` - -And finally, when DAOS version 2.8 is released this will be set -as the new `latest` release: - -``` -mike delete rc -mike delete latest - -git checkout release/2.8 -rm -rf docs/doxygen/html 2>/dev/null -doxygen -mike deploy -t "v2.8 - latest" v2.8 latest 2>&1 | tee deploy-log.v2.8.txt -rm -rf docs/doxygen/html -mike set-default latest - -mike list -``` - -It is useful to save the logs from the `mike deploy` invocation. -These logs will contain warnings about pages that exist but are not -linked anywhere in the navigation structure, and other issues. - -The new website can be tested locally by running `mike serve` on the machine -where the website is being created. This will start mike's built-in webserver. -The new contents can then be inspected by pointing a browser -(running on the same machine) to `http://localhost:8000/`. - -## Staging the new website contents to docs.daos.io - -Because the version aliases are symlinks, and because the website contents -consists of thousands of small files, copying it to the actual webserver -is best done by creating and transferring a tarfile, which can then be -un-tarred on the webserver and will keep the symlinks intact. - -As a best practice, a new website version is first deployed in a -staging area so it can be validated in the same webserver environment -in which the live website is running: - -``` -export WEBSERVER="docs.daos.io" -export TARFILE="docs-daos-io.tgz" - -git checkout gh-pages - -tar czvf $TARFILE versions.json index.html v?.? latest master - -ssh $WEBSERVER "rm -r docs.daos.io/staging && mkdir docs.daos.io/staging" -scp $TARFILE $WEBSERVER:docs.daos.io/staging -ssh $WEBSERVER "cd docs.daos.io/staging && tar xz $TARFILE && rm $TARFILE" - -rm $TARFILE - -git checkout master -``` - -At this point, the new website can be inspected by using the -[https://docs.daos.io/staging/](https://docs.daos.io/staging/) URL. - -## Go-live of the new website contents on docs.daos.io - -To make the website in the staging area the active website, there's a small -`go-live.sh` script on the webserver that will archive the current live -version and replace it with the one in the staging area: - -``` -$ cat go-live.sh -D=`date "+%Y-%m-%d"` -mv docs.daos.io old-docs.daos.io.$D -mv old-docs.daos.io.$D/staging docs.daos.io -mkdir docs.daos.io/staging -``` - -## Miscellaneous tips and tricks - -### Beware of relative URLs in links - -The `mkdocs` tool creates a *directory* (with an `index.html` in it) -for each Markdown *file* that it processes. -So a documentation file like `docs/user/container.md` -in the github master branch will become this URL on the website: -[https://docs.daos.io/master/user/container/index.html](https://docs.daos.io/master/user/container/index.html). - -This additional directory level may break relative URLs in -hyperlinks in the Markdown files. On the other hand, adding that -additional directory level to those relative URLs will break the -links when looking at the (un-processed) Markdown files directly -in github. - -There is no good fix. In some places absolute paths to website URLs -are used to make sure those hyperlinks work in both the raw -Markdown and the rendered HTML pages on the website. The drawback -of this approach is that those URLs will contain hardcoded -release numbers, but those can be quickly updated by a -search and replace when a new release branch is created. - -### Resolving Spell Checker Issues - -The DAOS CI performs spell checking on PRs. -If the spell checker reports an error for a legitimate word -(or acronym), the error can be resolved by adding the term to the -wordlist that is maintained in the `utils/cq/words.dict` file. - -### Always call mike on a release branch, not gh-pages - -You must be in an actual DAOS release branch, -not in the `gh-pages` branch, to run mike commands. -Otherwise the tool will report errors like this: - -``` -$ git branch -* gh-pages - master - -$ mike list -error: [Errno 2] No such file or directory: 'mkdocs.yml'; pass --config-file or set --remote/--branch explicitly -``` - +# DAOS Documentation + +Documentation on DAOS is available in the following places: + +* The [https://daos.io/](https://daos.io/) website contains general + information about the DAOS Foundation, a subproject of the + Linux Foundation. This website is maintained by the + DAOS Foundation and uses Wordpress for content management. + +* The DAOS Community Wiki [https://wiki.daos.io/](https://wiki.daos.io/) + is a redirection to + [https://daosio.atlassian.net/wiki/spaces/DC]( + https://daosio.atlassian.net/wiki/spaces/DC), + an Atlassian Wiki space. + +* Documentation for end users and administrators of DAOS is + available at [https://docs.daos.io/](https://docs.daos.io/). + This website provides versioned documentation for each DAOS release. + Its web pages are created from the contents in the + [daos-stack/daos/docs/](https://github.com/daos-stack/daos/tree/master/docs) + trees of the DAOS release branches in Github. + This website also cross-reference the "DAOS Internals" documentation + (in the "Developer Zone" section). + +* The `dmg` and `daos` commands have man-pages that are created at + DAOS build time through hidden `manpage` options of these commands. + The contents of these man-pages is embedded in the Go code for + the commands. The man-pages currently only output concatenated + lists of all subcommands and their options. + See + [src/control/cmd/dmg/README.md](https://github.com/daos-stack/daos/blob/master/src/control/cmd/dmg/README.md) + and + [src/control/cmd/daos/README.md](https://github.com/daos-stack/daos/blob/master/src/control/cmd/daos/README.md). + +* Most DAOS commands and subcommands have a `--help` option which + prints one-line descriptions of the (sub)command and its options. + The contents of these help texts is embedded in the Go code + (or C code) for the commands. + +* Developer-facing "DAOS Internals" documentation of the + DAOS software is maintained as Markdown files within the + [daos-stack/daos/src/](https://github.com/daos-stack/daos/tree/master/src) + source tree of the DAOS Github repository. + Make sure to access the release branch (`master`, `release/2.8`, etc.) + for the version of DAOS you are working with. + +* DAOS API documentation is generated from the DAOS source files using + `doxygen`. Running `doxygen` during the website deployment process for + [https://docs.daos.io/](https://docs.daos.io/) + creates HTML pages in `docs/doxygen/html/`, + which are then made available in the "Developer Zone" section of + [https://docs.daos.io/](https://docs.daos.io/). + +The following sections describe how the multi-versioned documentation +on [https://docs.daos.io/](https://docs.daos.io/) +is generated from the documentation Markdown files in the Github +`daos-stack/daos/docs/` tree (using the `mkdocs` and `mike` tools, +and the `doxygen` tool for the API documentation). + +## Configuration files in Github + +### Mkdocs configuration + +The [MkDocs](https://www.mkdocs.org/) tool is a static site generator +that creates web pages from Markdown files. +The `mkdocs` command is driven by a single configuration file, +[mkdocs.yml](https://github.com/daos-stack/daos/tree/master/mkdocs.yml) +in the toplevel directory of the DAOS project on github. +Among other project information, this file defines the +menu structure for the navigation bar of the website that is being created. +Each DAOS release branch uses its own version of `mkdocs.yml`, +as some information in that file is version specific or uses URLs +that are pointing to that specific DAOS version. + +!!! note + Absolute paths or hardcoded version numbers in the `mkdocs.yml` file + should **not** be changed to relative URLs/paths, + as that will break some of the processing by downstream tools. + These specifics need to be updated once when a new release branch is + created, and again when an existing release branch that was under + development reaches General Availability (GA). + Examples with detailed steps can be found below. + +The `mkdocs` tool creates a `sites` subdirectory in the toplevel +directory of the DAOS project on github, +in which the HTML documents for the static website are created. +To prevent this temporary subdirectory to be accidentally committed +into github, the +[.gitignore](https://github.com/daos-stack/daos/blob/master/.gitignore) +file contains a `site/` line. + +### Mike configuration + +The `mike` plugin is used with +[Materials for MkDocs](https://squidfunk.github.io/mkdocs-material/setup/setting-up-versioning/) +to generate versioned documentation for each of the DAOS release branches. +Its configuration information is stored in the `versions.json` file +that resides in a new `gh-pages` branch in a checked out version +of the DAOS repository. +Note that this file is **not** committed into the main DAOS-Stack +github repo. It only exists in the local `gh-pages` branch +on the system on which the webpages are generated. + +The `versions.json` file should **not** be edited directly. +The `mike` command should be used to define which versions of the +documentation are being created, which aliases should be set up, +and which version should be the default version to display. +See below for details. + +### Doxygen configuration + +The configuration file for the `doxygen` tool is +[Doxyfile](https://github.com/daos-stack/daos/blob/master/Doxyfile), +located in the toplevel directory of the DAOS project on github. + +## Installing the mkdocs Software + +On the machine where the DAOS documentation is to be built, +GIT needs to be set up so the `daos-stack/daos` project +can be checked out. +In addition, the MkDocs package and plugins need to be installed. +Running `pip install mike` should install the prerequisite packages. + +Depending on the operation system and Python environment, +prerequisite software and the mkdocs plugins may have to be explicitly +installed. For example, on a Windows laptop running Cygwin: + +```sh +pip3 install alabaster sphinxcontrib-applehelp sphinxcontrib-devhelp sphinxcontrib-htmlhelp sphinxcontrib-jsmath sphinxcontrib-qthelp + +pip3 install --user mkdocs +pip3 install --user mkdocs-material +pip3 install --user mike +export PATH="$HOME/.local/bin:$PATH" +mike --version +``` + +## Checking out DAOS from github + +To create the DAOS documentation webpages, it is recommended to work +on a clone of the DAOS project on github that is **separate** from any +checked out version that may be used for code development work. + +```sh +cd ~/dev +git clone git@github.com:daos-stack/daos.git daos-website +cd daos-website +git branch # should be on master branch +grep site .gitignore # should show "site/" +grep doxy .gitignore # should show "docs/doxygen/" +mike list # should show nothing on a new setup +``` + +## Creating the static website contents + +To create the static website contents, the following process is used. +For each DAOS release branch that should get included on the website: + +* the git release branch is checked out + +* running `doxygen` creates a temporary version of the doxygen documentation + for this release in the `docs/doxygen/html` subdirectory. + +!!! note + To prevent these files from being accidentally committed into github, + the [.gitignore](https://github.com/daos-stack/daos/blob/master/.gitignore) + file contains a `docs/doxygen/` line. + +* `mike deploy` is called with a title, a name for the release, + and optional alias name(s) for this release. + These will be used in the multi-versioned website's navigation system. + + * `mike` first uses `mkdocs` to create a temporary copy of the + static webpages for this release in the `site/` subdirectory. + + * `mike` will then copy that `site/` contents into the temporary `gh-pages` + branch, with a directory name that matches the release name + specified on the `mike deploy` invocation. + + * Any aliases for that release will be created as symlinks in the + `gh-pages` branch, pointing to the directory with the main release name. + +This process is repeated for each release. After all versions have been +processed this way, invoking `mike default` will set the version that is +displayed on the website by default. +That command creates a toplevel `index.html` in the `gh-pages` branch which +contains a redirect to the version that was selected as the default. + +A complete example for a website where DAOS 2.6 is the current "latest" +version, and the master branch is the "2.7" development branch for what +will eventually become DAOS 2.8: + +```sh +mike delete --all + +git checkout master +rm -rf docs/doxygen/html 2>/dev/null +doxygen +mike deploy -t "v2.7 - master" master v2.7 v2.8 2>&1 | tee deploy-log.master.txt +rm -rf docs/doxygen/html + +git checkout release/2.6 +rm -rf docs/doxygen/html 2>/dev/null +doxygen +mike deploy -t "v2.6 - latest" v2.6 latest 2>&1 | tee deploy-log.v2.6.txt +rm -rf docs/doxygen/html + +git checkout release/2.4 +rm -rf docs/doxygen/html 2>/dev/null +doxygen +mike deploy -t "v2.4 - deprecated" v2.4 2>&1 | tee deploy-log.v2.4.txt +rm -rf docs/doxygen/html + +mike set-default latest +mike list +``` + +After `release/2.8` has been branched, but before it becomes generally +available (GA), the website version structure should be updated like this: + +```sh +mike delete master # will also delete the v2.7 v2.8 aliases + +git checkout master +rm -rf docs/doxygen/html 2>/dev/null +doxygen +mike deploy -t "v2.9 - master" master v2.9 v3.0 2>&1 | tee deploy-log.master.txt +rm -rf docs/doxygen/html + +git checkout release/2.8 +rm -rf docs/doxygen/html 2>/dev/null +doxygen +mike deploy -t "v2.8 - rc" v2.8 rc 2>&1 | tee deploy-log.v2.8.txt +rm -rf docs/doxygen/html + +mike list +``` + +And finally, when DAOS version 2.8 is released this will be set +as the new `latest` release: + +```sh +mike delete rc +mike delete latest + +git checkout release/2.8 +rm -rf docs/doxygen/html 2>/dev/null +doxygen +mike deploy -t "v2.8 - latest" v2.8 latest 2>&1 | tee deploy-log.v2.8.txt +rm -rf docs/doxygen/html +mike set-default latest + +mike list +``` + +It is useful to save the logs from the `mike deploy` invocation. +These logs will contain warnings about pages that exist but are not +linked anywhere in the navigation structure, and other issues. + +The new website can be tested locally by running `mike serve` on the machine +where the website is being created. This will start mike's built-in webserver. +The new contents can then be inspected by pointing a browser +(running on the same machine) to `http://localhost:8000/`. + +## Staging the new website contents to docs.daos.io + +Because the version aliases are symlinks, and because the website contents +consists of thousands of small files, copying it to the actual webserver +is best done by creating and transferring a tarfile, which can then be +un-tarred on the webserver and will keep the symlinks intact. + +As a best practice, a new website version is first deployed in a +staging area so it can be validated in the same webserver environment +in which the live website is running: + +```sh +export WEBSERVER="docs.daos.io" +export TARFILE="docs-daos-io.tgz" + +git checkout gh-pages + +tar czvf $TARFILE versions.json index.html v?.? latest master + +ssh $WEBSERVER "rm -r docs.daos.io/staging && mkdir docs.daos.io/staging" +scp $TARFILE $WEBSERVER:docs.daos.io/staging +ssh $WEBSERVER "cd docs.daos.io/staging && tar xz $TARFILE && rm $TARFILE" + +rm $TARFILE + +git checkout master +``` + +At this point, the new website can be inspected by using the +[https://docs.daos.io/staging/](https://docs.daos.io/staging/) URL. + +## Go-live of the new website contents on docs.daos.io + +To make the website in the staging area the active website, there's a small +`go-live.sh` script on the webserver that will archive the current live +version and replace it with the one in the staging area: + +```sh +$ cat go-live.sh +D=`date "+%Y-%m-%d"` +mv docs.daos.io old-docs.daos.io.$D +mv old-docs.daos.io.$D/staging docs.daos.io +mkdir docs.daos.io/staging +``` + +## Miscellaneous tips and tricks + +### Beware of relative URLs in links + +The `mkdocs` tool creates a *directory* (with an `index.html` in it) +for each Markdown *file* that it processes. +So a documentation file like `docs/user/container.md` +in the github master branch will become this URL on the website: +[https://docs.daos.io/master/user/container/index.html](https://docs.daos.io/master/user/container/index.html). + +This additional directory level may break relative URLs in +hyperlinks in the Markdown files. On the other hand, adding that +additional directory level to those relative URLs will break the +links when looking at the (un-processed) Markdown files directly +in github. + +There is no good fix. In some places absolute paths to website URLs +are used to make sure those hyperlinks work in both the raw +Markdown and the rendered HTML pages on the website. The drawback +of this approach is that those URLs will contain hardcoded +release numbers, but those can be quickly updated by a +search and replace when a new release branch is created. + +### Resolving Spell Checker Issues + +The DAOS CI performs spell checking on PRs. +If the spell checker reports an error for a legitimate word +(or acronym), the error can be resolved by adding the term to the +wordlist that is maintained in the `utils/cq/words.dict` file. + +### VS Code cSpell Setup + +Visual Studio Code CodeSpell plugin works differently than the Daos CI +spell checker because it uses cSpell. + +To align local VS Code spell checking with DAOS word lists, +configure the workspace to load the same files used by repository checks. + +What this helps you with is that the VS Code IDE will not flag words known +to DAOS as spelling errors. + +As VS CODE will flag many more false positives than DAOS CI, you should use +the add words to workspace or user settings for what the IDE detects. + +Use a workspace settings file at `.vscode/settings.json` +and include dictionary entries for: + +* `ci/codespell.ignores` +* `utils/cq/words.dict` + +Example: + +```json +{ + "cSpell.customDictionaries": { + "daosCodespellIgnores": { + "name": "daosCodespellIgnores", + "path": "${workspaceFolder}/ci/codespell.ignores", + "addWords": false, + "scope": "workspace" + }, + "daosWordsDict": { + "name": "daosWordsDict", + "path": "${workspaceFolder}/utils/cq/words.dict", + "addWords": false, + "scope": "workspace" + } + }, + "cSpell.dictionaries": [ + "daosCodespellIgnores", + "daosWordsDict" + ] +} +``` + +This keeps personal word lists separate while preserving behavior +consistent with DAOS spell-check inputs. + +### Always call mike on a release branch, not gh-pages + +You must be in an actual DAOS release branch, +not in the `gh-pages` branch, to run mike commands. +Otherwise the tool will report errors like this: + +```sh +$ git branch +* gh-pages + master + +$ mike list +error: [Errno 2] No such file or directory: 'mkdocs.yml'; pass --config-file or set --remote/--branch explicitly +``` diff --git a/utils/githooks/README.md b/utils/githooks/README.md index 5ad8fcfba47..bbcedcd19db 100644 --- a/utils/githooks/README.md +++ b/utils/githooks/README.md @@ -1,3 +1,9 @@ + + # About DAOS Git hooks Githooks are a [well documented](https://git-scm.com/docs/githooks) feature @@ -14,7 +20,8 @@ Installing is a two-step process: ### 1. Install the hooks -Configure your `core.hookspath`. +Configure your `core.hookspath`. + Any new githooks added to the repository will automatically run, but possibly require additional software to produce the desired effect. Additionally, as the branch changes, the githooks change with it. @@ -32,25 +39,36 @@ The Githooks framework in DAOS is such that the hooks will all run. However, some hooks will simply check for required software and are effectively a noop if such is not installed. -Requirements come from a combination of `pip` and system packages and can usually be installed through standard means. +Requirements come from a combination of `pip` and system packages and can usually be installed +through standard means. #### Install pip packages -To install `pip` packages specified in [utils/cq/requirements.txt](../../utils/cq/requirements.txt) it is recommended to setup a virtual environment and install with pip. -It is recommended to use python 3.11, but you need at least 3.10 to get the latest version of each package. + +To install `pip` packages specified in [utils/cq/requirements.txt](../../utils/cq/requirements.txt) +it is recommended to setup a virtual environment and install with pip. +It is recommended to use python 3.11, but you need at least 3.10 to get the latest version of each +package. + You can setup a virtual environment with: + ```sh python3.11 -m venv /path/to/my_env ``` + Then, to use it: + ```sh source /path/to/my_env/bin/activate ``` + Then, to install the requirements: + ```sh python3 -m pip install -r utils/cq/requirements.txt ``` #### Install System Packages + Install system packages with your package manager - for example: ```sh @@ -108,7 +126,8 @@ allowing the user to inspect the changes and retry the commit. 7. isort - Linter for python imports on modified python files 8. flake - Linter for python files 9. pylint - Additional linter for modified python files - - See [daos_pylint.py](../../utils/cq/daos_pylint.py) for a custom wrapper around `pylint` which manages `PYTHONPATH` setup internally. + - See [daos_pylint.py](../../utils/cq/daos_pylint.py) for a custom wrapper around `pylint` which + manages `PYTHONPATH` setup internally. 10. ftest - Custom linter for modified ftest files ### prepare-commit-msg