Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions diagnosis/perf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

# Usage: ./trace_container.sh <container_name> <duration_seconds>
CONTAINER_NAME=$1
DURATION=$2
FLAMEGRAPH_DIR="$HOME/FlameGraph" # Adjust this path
OUTPUT_DIR="./flamegraphs"

if [ -z "$CONTAINER_NAME" ] || [ -z "$DURATION" ]; then
echo "Usage: $0 <container_name> <duration>"
exit 1
fi

# 1. Get the long Container ID
CONTAINER_ID=$(docker inspect --format='{{.Id}}' "$CONTAINER_NAME")
if [ $? -ne 0 ]; then echo "Container not found"; exit 1; fi

# 2. Get all PIDs in the container
PIDS=$(docker top "$CONTAINER_NAME" -eo pid | tail -n +2 | tr '\n' ',' | sed 's/,$//')
if [ -z "$PIDS" ]; then
echo "No processes found in container"
exit 1
fi

# Create output directory if it doesn't exist
mkdir -p "$OUTPUT_DIR"

echo "--- Tracing Container: $CONTAINER_NAME ---"
echo "Container ID: $CONTAINER_ID"
echo "Tracing PIDs: $PIDS"
echo "Duration: $DURATION seconds"
echo "Number of PIDs: $(echo $PIDS | tr ',' '\n' | wc -l)"

# 3. Start Off-CPU tracing in the background (using perf sched events)
echo "Starting Off-CPU trace..."
sudo /root/linux/tools/perf/perf record -e sched:sched_switch -a -g --pid="$PIDS" -o "$OUTPUT_DIR/offcpu.data" -- sleep "$DURATION" &
OFF_PID=$!

# 4. Start On-CPU tracing (using Perf) - increase frequency
echo "Starting On-CPU trace..."
sudo /root/linux/tools/perf/perf record -F 99 -e cycles -a -g --pid="$PIDS" -o "$OUTPUT_DIR/oncpu.data" -- sleep "$DURATION"

# Wait for Off-CPU to finish
wait $OFF_PID

echo "--- Processing Results ---"

# Check if data files were created and have content
if [ ! -f "$OUTPUT_DIR/oncpu.data" ]; then
echo "ERROR: oncpu.data not created"
exit 1
fi

if [ ! -f "$OUTPUT_DIR/offcpu.data" ]; then
echo "ERROR: offcpu.data not created"
exit 1
fi

echo "Data files created successfully"
ls -lh "$OUTPUT_DIR"/*.data

# Check if FlameGraph scripts exist
if [ ! -f "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" ]; then
echo "ERROR: stackcollapse-perf.pl not found at $FLAMEGRAPH_DIR"
exit 1
fi

if [ ! -f "$FLAMEGRAPH_DIR/flamegraph.pl" ]; then
echo "ERROR: flamegraph.pl not found at $FLAMEGRAPH_DIR"
exit 1
fi

# 5. Generate On-CPU Flamegraph
echo "Processing On-CPU data..."
/root/linux/tools/perf/perf script -i "$OUTPUT_DIR/oncpu.data" | "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" > "$OUTPUT_DIR/oncpu.folded"
ONCPU_LINES=$(wc -l < "$OUTPUT_DIR/oncpu.folded")
echo "On-CPU folded lines: $ONCPU_LINES"

if [ "$ONCPU_LINES" -gt 0 ]; then
"$FLAMEGRAPH_DIR/flamegraph.pl" "$OUTPUT_DIR/oncpu.folded" > "$OUTPUT_DIR/${CONTAINER_NAME}_oncpu.svg"
echo "Created $OUTPUT_DIR/${CONTAINER_NAME}_oncpu.svg"
else
echo "WARNING: No On-CPU data captured"
fi

# 6. Generate Off-CPU Flamegraph
echo "Processing Off-CPU data..."
/root/linux/tools/perf/perf script -i "$OUTPUT_DIR/offcpu.data" | "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" > "$OUTPUT_DIR/offcpu.folded"
OFFCPU_LINES=$(wc -l < "$OUTPUT_DIR/offcpu.folded")
echo "Off-CPU folded lines: $OFFCPU_LINES"

if [ "$OFFCPU_LINES" -gt 0 ]; then
"$FLAMEGRAPH_DIR/flamegraph.pl" --color=io --title="Off-CPU: $CONTAINER_NAME" "$OUTPUT_DIR/offcpu.folded" > "$OUTPUT_DIR/${CONTAINER_NAME}_offcpu.svg"
echo "Created $OUTPUT_DIR/${CONTAINER_NAME}_offcpu.svg"
else
echo "WARNING: No Off-CPU data captured"
fi

echo "Done! Check $OUTPUT_DIR/${CONTAINER_NAME}_oncpu.svg and $OUTPUT_DIR/${CONTAINER_NAME}_offcpu.svg"
147 changes: 147 additions & 0 deletions diagnosis/strace.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/bin/bash

# Usage: ./strace.sh <container_name> <duration_seconds>
CONTAINER_NAME=$1
DURATION=$2
OUTPUT_DIR="./strace_output"

if [ -z "$CONTAINER_NAME" ] || [ -z "$DURATION" ]; then
echo "Usage: $0 <container_name> <duration>"
exit 1
fi

# 1. Get the long Container ID
CONTAINER_ID=$(docker inspect --format='{{.Id}}' "$CONTAINER_NAME")
if [ $? -ne 0 ]; then echo "Container not found"; exit 1; fi

# 2. Get all PIDs in the container
PIDS=$(docker top "$CONTAINER_NAME" -eo pid | tail -n +2)
if [ -z "$PIDS" ]; then
echo "No processes found in container"
exit 1
fi

# Create output directory if it doesn't exist
mkdir -p "$OUTPUT_DIR"

echo "--- Strace Container: $CONTAINER_NAME ---"
echo "Container ID: $CONTAINER_ID"
echo "Duration: $DURATION seconds"
echo "Number of PIDs: $(echo "$PIDS" | wc -l)"
echo "PIDs: $(echo "$PIDS" | tr '\n' ' ')"

# Timestamp for output files
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
OUTPUT_FILE="$OUTPUT_DIR/${CONTAINER_NAME}_${TIMESTAMP}.strace"
SUMMARY_FILE="$OUTPUT_DIR/${CONTAINER_NAME}_${TIMESTAMP}_summary.txt"

# 3. Build strace command for all PIDs
STRACE_PIDS=""
for pid in $PIDS; do
STRACE_PIDS="$STRACE_PIDS -p $pid"
done

echo "Starting strace..."
echo "Output file: $OUTPUT_FILE"

# 4. Run strace with timing information and follow forks
# -f: follow forks
# -t: print timestamps
# -T: show time spent in each syscall
# -y: print paths associated with file descriptors
# -yy: print protocol specific information for socket file descriptors
# -s: max string size to print (default 32, increase if needed)
# -o: output file
# Use timeout with -s SIGINT to properly stop strace, and -k to SIGKILL if needed
timeout -s SIGINT -k 5 "$DURATION" sudo strace -f -t -T -y -s 256 $STRACE_PIDS -o "$OUTPUT_FILE" 2>&1

STRACE_EXIT=$?

if [ $STRACE_EXIT -eq 124 ]; then
echo "Strace completed (timeout reached)"
elif [ $STRACE_EXIT -eq 0 ]; then
echo "Strace completed successfully"
else
echo "Strace exited with code $STRACE_EXIT"
fi

echo "--- Processing Results ---"

# Check if output file was created and has content
if [ ! -f "$OUTPUT_FILE" ]; then
echo "ERROR: strace output file not created"
exit 1
fi

FILE_SIZE=$(stat -f%z "$OUTPUT_FILE" 2>/dev/null || stat -c%s "$OUTPUT_FILE" 2>/dev/null)
echo "Output file size: $FILE_SIZE bytes"

if [ "$FILE_SIZE" -eq 0 ]; then
echo "WARNING: No strace data captured"
exit 1
fi

# 5. Generate summary statistics
echo "Generating summary..."

{
echo "=== Strace Summary for $CONTAINER_NAME ==="
echo "Timestamp: $TIMESTAMP"
echo "Duration: $DURATION seconds"
echo "Container ID: $CONTAINER_ID"
echo "PIDs traced: $(echo "$PIDS" | tr '\n' ' ')"
echo ""
echo "=== Top 20 System Calls by Count ==="
grep -oP '^\d+\s+\d+:\d+:\d+\s+\K[a-z_0-9]+(?=\()' "$OUTPUT_FILE" | sort | uniq -c | sort -rn | head -20
echo ""
echo "=== Top 20 System Calls by Time (if -T was used) ==="
grep -oP '<[\d.]+>' "$OUTPUT_FILE" | sed 's/[<>]//g' | awk '{sum+=$1; count++} END {if(count>0) print "Average syscall time:", sum/count, "seconds"}'
echo ""
echo "=== Error Summary (failed syscalls) ==="
grep -E 'E[A-Z]+' "$OUTPUT_FILE" | grep -oP 'E[A-Z]+' | sort | uniq -c | sort -rn | head -10
echo ""
echo "=== File Operations ==="
echo "open/openat calls:"
grep -c 'open\(at\)\?(' "$OUTPUT_FILE" || echo "0"
echo "read/pread calls:"
grep -c 'p\?read(' "$OUTPUT_FILE" || echo "0"
echo "write/pwrite calls:"
grep -c 'p\?write(' "$OUTPUT_FILE" || echo "0"
echo "close calls:"
grep -c 'close(' "$OUTPUT_FILE" || echo "0"
echo ""
echo "=== Network Operations ==="
echo "socket calls:"
grep -c 'socket(' "$OUTPUT_FILE" || echo "0"
echo "connect calls:"
grep -c 'connect(' "$OUTPUT_FILE" || echo "0"
echo "send/write calls:"
grep -c 'send\|sendto\|sendmsg' "$OUTPUT_FILE" || echo "0"
echo "recv/read calls:"
grep -c 'recv\|recvfrom\|recvmsg' "$OUTPUT_FILE" || echo "0"
echo ""
echo "=== Memory Operations ==="
echo "mmap calls:"
grep -c 'mmap(' "$OUTPUT_FILE" || echo "0"
echo "munmap calls:"
grep -c 'munmap(' "$OUTPUT_FILE" || echo "0"
echo "brk calls:"
grep -c 'brk(' "$OUTPUT_FILE" || echo "0"
echo ""
echo "=== Process/Thread Operations ==="
echo "clone calls:"
grep -c 'clone(' "$OUTPUT_FILE" || echo "0"
echo "fork calls:"
grep -c 'fork(' "$OUTPUT_FILE" || echo "0"
echo "execve calls:"
grep -c 'execve(' "$OUTPUT_FILE" || echo "0"
echo "exit calls:"
grep -c 'exit\(_group\)\?(' "$OUTPUT_FILE" || echo "0"
} > "$SUMMARY_FILE"

echo "Summary saved to: $SUMMARY_FILE"
echo ""
cat "$SUMMARY_FILE"
echo ""
echo "Done! Full strace output: $OUTPUT_FILE"
echo "Summary: $SUMMARY_FILE"