From 21c3b60b4c1f9672325d3dc27b06b6f0afd6ee4d Mon Sep 17 00:00:00 2001 From: Anjali05 Date: Wed, 4 Mar 2026 10:37:47 -0600 Subject: [PATCH] backup --- diagnosis/perf.sh | 99 +++++++++++++++++++++++++++++ diagnosis/strace.sh | 147 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100755 diagnosis/perf.sh create mode 100755 diagnosis/strace.sh diff --git a/diagnosis/perf.sh b/diagnosis/perf.sh new file mode 100755 index 0000000000..784d9237d8 --- /dev/null +++ b/diagnosis/perf.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Usage: ./trace_container.sh +CONTAINER_NAME=$1 +DURATION=$2 +FLAMEGRAPH_DIR="$HOME/FlameGraph" # Adjust this path +OUTPUT_DIR="./flamegraphs" + +if [ -z "$CONTAINER_NAME" ] || [ -z "$DURATION" ]; then + echo "Usage: $0 " + exit 1 +fi + +# 1. Get the long Container ID +CONTAINER_ID=$(docker inspect --format='{{.Id}}' "$CONTAINER_NAME") +if [ $? -ne 0 ]; then echo "Container not found"; exit 1; fi + +# 2. Get all PIDs in the container +PIDS=$(docker top "$CONTAINER_NAME" -eo pid | tail -n +2 | tr '\n' ',' | sed 's/,$//') +if [ -z "$PIDS" ]; then + echo "No processes found in container" + exit 1 +fi + +# Create output directory if it doesn't exist +mkdir -p "$OUTPUT_DIR" + +echo "--- Tracing Container: $CONTAINER_NAME ---" +echo "Container ID: $CONTAINER_ID" +echo "Tracing PIDs: $PIDS" +echo "Duration: $DURATION seconds" +echo "Number of PIDs: $(echo $PIDS | tr ',' '\n' | wc -l)" + +# 3. Start Off-CPU tracing in the background (using perf sched events) +echo "Starting Off-CPU trace..." +sudo /root/linux/tools/perf/perf record -e sched:sched_switch -a -g --pid="$PIDS" -o "$OUTPUT_DIR/offcpu.data" -- sleep "$DURATION" & +OFF_PID=$! + +# 4. Start On-CPU tracing (using Perf) - increase frequency +echo "Starting On-CPU trace..." +sudo /root/linux/tools/perf/perf record -F 99 -e cycles -a -g --pid="$PIDS" -o "$OUTPUT_DIR/oncpu.data" -- sleep "$DURATION" + +# Wait for Off-CPU to finish +wait $OFF_PID + +echo "--- Processing Results ---" + +# Check if data files were created and have content +if [ ! -f "$OUTPUT_DIR/oncpu.data" ]; then + echo "ERROR: oncpu.data not created" + exit 1 +fi + +if [ ! -f "$OUTPUT_DIR/offcpu.data" ]; then + echo "ERROR: offcpu.data not created" + exit 1 +fi + +echo "Data files created successfully" +ls -lh "$OUTPUT_DIR"/*.data + +# Check if FlameGraph scripts exist +if [ ! -f "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" ]; then + echo "ERROR: stackcollapse-perf.pl not found at $FLAMEGRAPH_DIR" + exit 1 +fi + +if [ ! -f "$FLAMEGRAPH_DIR/flamegraph.pl" ]; then + echo "ERROR: flamegraph.pl not found at $FLAMEGRAPH_DIR" + exit 1 +fi + +# 5. Generate On-CPU Flamegraph +echo "Processing On-CPU data..." +/root/linux/tools/perf/perf script -i "$OUTPUT_DIR/oncpu.data" | "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" > "$OUTPUT_DIR/oncpu.folded" +ONCPU_LINES=$(wc -l < "$OUTPUT_DIR/oncpu.folded") +echo "On-CPU folded lines: $ONCPU_LINES" + +if [ "$ONCPU_LINES" -gt 0 ]; then + "$FLAMEGRAPH_DIR/flamegraph.pl" "$OUTPUT_DIR/oncpu.folded" > "$OUTPUT_DIR/${CONTAINER_NAME}_oncpu.svg" + echo "Created $OUTPUT_DIR/${CONTAINER_NAME}_oncpu.svg" +else + echo "WARNING: No On-CPU data captured" +fi + +# 6. Generate Off-CPU Flamegraph +echo "Processing Off-CPU data..." +/root/linux/tools/perf/perf script -i "$OUTPUT_DIR/offcpu.data" | "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" > "$OUTPUT_DIR/offcpu.folded" +OFFCPU_LINES=$(wc -l < "$OUTPUT_DIR/offcpu.folded") +echo "Off-CPU folded lines: $OFFCPU_LINES" + +if [ "$OFFCPU_LINES" -gt 0 ]; then + "$FLAMEGRAPH_DIR/flamegraph.pl" --color=io --title="Off-CPU: $CONTAINER_NAME" "$OUTPUT_DIR/offcpu.folded" > "$OUTPUT_DIR/${CONTAINER_NAME}_offcpu.svg" + echo "Created $OUTPUT_DIR/${CONTAINER_NAME}_offcpu.svg" +else + echo "WARNING: No Off-CPU data captured" +fi + +echo "Done! Check $OUTPUT_DIR/${CONTAINER_NAME}_oncpu.svg and $OUTPUT_DIR/${CONTAINER_NAME}_offcpu.svg" \ No newline at end of file diff --git a/diagnosis/strace.sh b/diagnosis/strace.sh new file mode 100755 index 0000000000..3dbabc043b --- /dev/null +++ b/diagnosis/strace.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +# Usage: ./strace.sh +CONTAINER_NAME=$1 +DURATION=$2 +OUTPUT_DIR="./strace_output" + +if [ -z "$CONTAINER_NAME" ] || [ -z "$DURATION" ]; then + echo "Usage: $0 " + exit 1 +fi + +# 1. Get the long Container ID +CONTAINER_ID=$(docker inspect --format='{{.Id}}' "$CONTAINER_NAME") +if [ $? -ne 0 ]; then echo "Container not found"; exit 1; fi + +# 2. Get all PIDs in the container +PIDS=$(docker top "$CONTAINER_NAME" -eo pid | tail -n +2) +if [ -z "$PIDS" ]; then + echo "No processes found in container" + exit 1 +fi + +# Create output directory if it doesn't exist +mkdir -p "$OUTPUT_DIR" + +echo "--- Strace Container: $CONTAINER_NAME ---" +echo "Container ID: $CONTAINER_ID" +echo "Duration: $DURATION seconds" +echo "Number of PIDs: $(echo "$PIDS" | wc -l)" +echo "PIDs: $(echo "$PIDS" | tr '\n' ' ')" + +# Timestamp for output files +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +OUTPUT_FILE="$OUTPUT_DIR/${CONTAINER_NAME}_${TIMESTAMP}.strace" +SUMMARY_FILE="$OUTPUT_DIR/${CONTAINER_NAME}_${TIMESTAMP}_summary.txt" + +# 3. Build strace command for all PIDs +STRACE_PIDS="" +for pid in $PIDS; do + STRACE_PIDS="$STRACE_PIDS -p $pid" +done + +echo "Starting strace..." +echo "Output file: $OUTPUT_FILE" + +# 4. Run strace with timing information and follow forks +# -f: follow forks +# -t: print timestamps +# -T: show time spent in each syscall +# -y: print paths associated with file descriptors +# -yy: print protocol specific information for socket file descriptors +# -s: max string size to print (default 32, increase if needed) +# -o: output file +# Use timeout with -s SIGINT to properly stop strace, and -k to SIGKILL if needed +timeout -s SIGINT -k 5 "$DURATION" sudo strace -f -t -T -y -s 256 $STRACE_PIDS -o "$OUTPUT_FILE" 2>&1 + +STRACE_EXIT=$? + +if [ $STRACE_EXIT -eq 124 ]; then + echo "Strace completed (timeout reached)" +elif [ $STRACE_EXIT -eq 0 ]; then + echo "Strace completed successfully" +else + echo "Strace exited with code $STRACE_EXIT" +fi + +echo "--- Processing Results ---" + +# Check if output file was created and has content +if [ ! -f "$OUTPUT_FILE" ]; then + echo "ERROR: strace output file not created" + exit 1 +fi + +FILE_SIZE=$(stat -f%z "$OUTPUT_FILE" 2>/dev/null || stat -c%s "$OUTPUT_FILE" 2>/dev/null) +echo "Output file size: $FILE_SIZE bytes" + +if [ "$FILE_SIZE" -eq 0 ]; then + echo "WARNING: No strace data captured" + exit 1 +fi + +# 5. Generate summary statistics +echo "Generating summary..." + +{ + echo "=== Strace Summary for $CONTAINER_NAME ===" + echo "Timestamp: $TIMESTAMP" + echo "Duration: $DURATION seconds" + echo "Container ID: $CONTAINER_ID" + echo "PIDs traced: $(echo "$PIDS" | tr '\n' ' ')" + echo "" + echo "=== Top 20 System Calls by Count ===" + grep -oP '^\d+\s+\d+:\d+:\d+\s+\K[a-z_0-9]+(?=\()' "$OUTPUT_FILE" | sort | uniq -c | sort -rn | head -20 + echo "" + echo "=== Top 20 System Calls by Time (if -T was used) ===" + grep -oP '<[\d.]+>' "$OUTPUT_FILE" | sed 's/[<>]//g' | awk '{sum+=$1; count++} END {if(count>0) print "Average syscall time:", sum/count, "seconds"}' + echo "" + echo "=== Error Summary (failed syscalls) ===" + grep -E 'E[A-Z]+' "$OUTPUT_FILE" | grep -oP 'E[A-Z]+' | sort | uniq -c | sort -rn | head -10 + echo "" + echo "=== File Operations ===" + echo "open/openat calls:" + grep -c 'open\(at\)\?(' "$OUTPUT_FILE" || echo "0" + echo "read/pread calls:" + grep -c 'p\?read(' "$OUTPUT_FILE" || echo "0" + echo "write/pwrite calls:" + grep -c 'p\?write(' "$OUTPUT_FILE" || echo "0" + echo "close calls:" + grep -c 'close(' "$OUTPUT_FILE" || echo "0" + echo "" + echo "=== Network Operations ===" + echo "socket calls:" + grep -c 'socket(' "$OUTPUT_FILE" || echo "0" + echo "connect calls:" + grep -c 'connect(' "$OUTPUT_FILE" || echo "0" + echo "send/write calls:" + grep -c 'send\|sendto\|sendmsg' "$OUTPUT_FILE" || echo "0" + echo "recv/read calls:" + grep -c 'recv\|recvfrom\|recvmsg' "$OUTPUT_FILE" || echo "0" + echo "" + echo "=== Memory Operations ===" + echo "mmap calls:" + grep -c 'mmap(' "$OUTPUT_FILE" || echo "0" + echo "munmap calls:" + grep -c 'munmap(' "$OUTPUT_FILE" || echo "0" + echo "brk calls:" + grep -c 'brk(' "$OUTPUT_FILE" || echo "0" + echo "" + echo "=== Process/Thread Operations ===" + echo "clone calls:" + grep -c 'clone(' "$OUTPUT_FILE" || echo "0" + echo "fork calls:" + grep -c 'fork(' "$OUTPUT_FILE" || echo "0" + echo "execve calls:" + grep -c 'execve(' "$OUTPUT_FILE" || echo "0" + echo "exit calls:" + grep -c 'exit\(_group\)\?(' "$OUTPUT_FILE" || echo "0" +} > "$SUMMARY_FILE" + +echo "Summary saved to: $SUMMARY_FILE" +echo "" +cat "$SUMMARY_FILE" +echo "" +echo "Done! Full strace output: $OUTPUT_FILE" +echo "Summary: $SUMMARY_FILE"