Performance Troubleshooting and Optimization
CPU Performance Analysis
CPU Monitoring and Diagnosis
# Real-time CPU monitoring
top # Classic process monitor
htop # Enhanced interactive monitor
atop # Advanced system monitor
btop # Modern resource monitor
# CPU utilization metrics
vmstat 1 # Virtual memory statistics
iostat -c 1 # CPU statistics only
sar -u 1 10 # CPU utilization over time
mpstat 1 # Multi-processor statistics
# Identify CPU-intensive processes
ps aux --sort=-%cpu | head -10 # Top CPU consumers
ps -eo pid,ppid,cmd,%mem,%cpu --sort=-%cpu | head
pidstat -u 1 # Per-process CPU usage
CPU Performance Issues
# Check CPU frequency and scaling
cat /proc/cpuinfo | grep MHz
cpufreq-info # CPU frequency information
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
# Fix CPU performance governor
echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
cpufreq-set -g performance # Set performance governor
# Check CPU load average
uptime # Load averages
cat /proc/loadavg # Load average file
sar -q 1 10 # Queue length and load average
# Identify runaway processes
ps aux | awk '$3 > 50' # Processes using >50% CPU
pkill -f process_name # Kill problematic process
renice -10 -p PID # Change process priority
CPU Optimization
# Process priority management
nice -n 19 command # Start with low priority
renice 10 -p PID # Reduce process priority
ionice -c 3 -p PID # Set idle I/O class
# CPU affinity settings
taskset -c 0,1 command # Bind to specific CPUs
taskset -cp 0-3 PID # Set CPU affinity for running process
# Kernel thread optimization
echo 0 > /proc/sys/kernel/watchdog # Disable watchdog if not needed
Memory Performance Analysis
Memory Monitoring
# Memory usage overview
free -h # Human-readable memory info
cat /proc/meminfo # Detailed memory information
vmstat 1 # Memory statistics
sar -r 1 10 # Memory utilization over time
# Per-process memory usage
ps aux --sort=-%mem | head -10 # Top memory consumers
pmap -x PID # Process memory mapping
smem -tk # Advanced memory reporting
pidstat -r 1 # Per-process memory stats
Memory Issues Diagnosis
# Check for memory leaks
valgrind --tool=memcheck --leak-check=full ./program
ps -o pid,vsz,rss,comm | sort -k 2 -n # Sort by virtual memory
# Out of Memory (OOM) analysis
dmesg | grep -i "killed process" # OOM killer messages
journalctl -k | grep -i oom # OOM kernel messages
cat /proc/sys/vm/oom_score_adj # OOM adjustment
# Swap usage analysis
swapon --show # Show swap devices
cat /proc/swaps # Swap usage
sar -S 1 10 # Swap statistics
Memory Optimization
# Tune swappiness
echo 10 > /proc/sys/vm/swappiness # Reduce swap usage
echo 'vm.swappiness=10' >> /etc/sysctl.conf # Permanent setting
# Clear caches (emergency only)
sync # Flush file system buffers
echo 1 > /proc/sys/vm/drop_caches # Clear page cache
echo 2 > /proc/sys/vm/drop_caches # Clear dentries/inodes
echo 3 > /proc/sys/vm/drop_caches # Clear all caches
# Memory optimization settings
echo 'vm.dirty_ratio=5' >> /etc/sysctl.conf
echo 'vm.dirty_background_ratio=2' >> /etc/sysctl.conf
sysctl -p # Apply settings
# Huge pages configuration
echo 1024 > /proc/sys/vm/nr_hugepages
mount -t hugetlbfs none /mnt/hugepages
I/O Performance Analysis
Disk I/O Monitoring
# I/O statistics
iostat -x 1 # Extended I/O statistics
iotop # I/O usage by process
iotop -o # Only active I/O processes
atop -d # Disk statistics in atop
# Per-process I/O
pidstat -d 1 # Per-process disk statistics
sar -d 1 10 # Device utilization
cat /proc/diskstats # Disk statistics
# Check I/O wait
vmstat 1 # Look at 'wa' column
sar -u 1 10 # CPU with I/O wait
I/O Performance Issues
# Identify I/O bottlenecks
iostat -x 1 | grep -E "(Device|sd|nvme)" # High utilization devices
lsof | grep deleted # Deleted files still open
fuser -v /mount/point # Processes using filesystem
# Check filesystem performance
hdparm -tT /dev/sda # Disk read performance
dd if=/dev/zero of=testfile bs=1G count=1 oflag=direct # Write test
sync; time sh -c "dd if=/dev/zero of=testfile bs=1M count=1024; sync"
# Disk health check
smartctl -a /dev/sda # SMART attributes
badblocks -v /dev/sda # Check for bad blocks
I/O Optimization
# I/O scheduler optimization
echo deadline > /sys/block/sda/queue/scheduler # For HDDs
echo noop > /sys/block/sda/queue/scheduler # For SSDs
echo mq-deadline > /sys/block/nvme0n1/queue/scheduler # For NVMe
# Filesystem mount options
mount -o noatime,nodiratime /dev/sda1 /mnt # Disable access time updates
# Add to /etc/fstab: /dev/sda1 /mnt ext4 defaults,noatime,nodiratime 0 2
# Read-ahead optimization
blockdev --setra 4096 /dev/sda # Set read-ahead to 2MB
echo 4096 > /sys/block/sda/queue/read_ahead_kb
# File system optimization
tune2fs -o journal_data_writeback /dev/sda1 # Faster journaling
Network Performance Analysis
Network Monitoring
# Network throughput monitoring
iftop # Interface bandwidth usage
nethogs # Network usage by process
vnstat # Network traffic statistics
ss -i # Socket information with metrics
# Network performance testing
iperf3 -s # Start iperf server
iperf3 -c server_ip # Test throughput
ping -c 100 server_ip # Latency testing
traceroute server_ip # Route analysis
Network Optimization
# TCP buffer tuning
echo 'net.core.rmem_max = 134217728' >> /etc/sysctl.conf
echo 'net.core.wmem_max = 134217728' >> /etc/sysctl.conf
echo 'net.ipv4.tcp_rmem = 4096 87380 134217728' >> /etc/sysctl.conf
echo 'net.ipv4.tcp_wmem = 4096 65536 134217728' >> /etc/sysctl.conf
# Network queue optimization
echo 'net.core.netdev_max_backlog = 5000' >> /etc/sysctl.conf
echo 'net.ipv4.tcp_congestion_control = bbr' >> /etc/sysctl.conf
sysctl -p # Apply settings
# Interface optimization
ethtool -K eth0 tso on # TCP segmentation offload
ethtool -K eth0 gso on # Generic segmentation offload
System-wide Performance Analysis
Comprehensive Monitoring
# System performance overview
dstat # Versatile system stats
glances # Cross-platform monitoring
nmon # Performance monitoring tool
collectl # Comprehensive data collector
# Historical performance data
sar -A # All available statistics
sar -f /var/log/sysstat/saXX # Historical data
Performance Profiling
# CPU profiling with perf
perf record -g ./program # Record with call graphs
perf report # Analyze recorded data
perf top # Real-time profiling
# System call tracing
strace -c ./program # System call summary
strace -p PID # Trace running process
ltrace ./program # Library call tracing
Automated Performance Monitoring
#!/bin/bash
# Performance monitoring script
LOG_FILE="/var/log/performance-$(date +%Y%m%d).log"
ALERT_EMAIL="admin@example.com"
echo "Performance Check - $(date)" >> "$LOG_FILE"
# Check CPU load
LOAD=$(uptime | awk '{print $(NF-2)}' | sed 's/,//')
if (( $(echo "$LOAD > 5.0" | bc -l) )); then
echo "High CPU load: $LOAD" >> "$LOG_FILE"
echo "High system load detected: $LOAD" | \
mail -s "Performance Alert: CPU Load" "$ALERT_EMAIL"
fi
# Check memory usage
MEM_USAGE=$(free | awk '/^Mem:/ {printf "%.1f", $3/$2 * 100}')
if (( $(echo "$MEM_USAGE > 90" | bc -l) )); then
echo "High memory usage: ${MEM_USAGE}%" >> "$LOG_FILE"
echo "High memory usage: ${MEM_USAGE}%" | \
mail -s "Performance Alert: Memory" "$ALERT_EMAIL"
fi
# Check disk I/O wait
IO_WAIT=$(vmstat 1 2 | tail -1 | awk '{print $16}')
if [ "$IO_WAIT" -gt 20 ]; then
echo "High I/O wait: ${IO_WAIT}%" >> "$LOG_FILE"
echo "High I/O wait detected: ${IO_WAIT}%" | \
mail -s "Performance Alert: I/O Wait" "$ALERT_EMAIL"
fi
# Top resource consumers
echo "Top CPU processes:" >> "$LOG_FILE"
ps aux --sort=-%cpu | head -5 >> "$LOG_FILE"
echo "Top Memory processes:" >> "$LOG_FILE"
ps aux --sort=-%mem | head -5 >> "$LOG_FILE"
Performance Optimization Checklist
# Quick performance fixes
# 1. Update system packages
apt update && apt upgrade
# 2. Clean unnecessary files
apt autoremove
apt autoclean
journalctl --vacuum-time=7d
# 3. Optimize boot services
systemctl list-unit-files --type=service | grep enabled
systemctl disable unnecessary_service
# 4. Tune kernel parameters
echo 'kernel.sched_migration_cost_ns = 5000000' >> /etc/sysctl.conf
echo 'kernel.sched_autogroup_enabled = 0' >> /etc/sysctl.conf
# 5. Enable performance governor
echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
# 6. Optimize storage
fstrim -v / # SSD trim
tune2fs -o journal_data_writeback /dev/sda1
This performance troubleshooting guide provides comprehensive tools and techniques for analyzing and optimizing CPU, memory, I/O, and network performance, enabling identification of bottlenecks and implementation of targeted optimizations.