Skip to main content

Server Performance Debugging

This guide helps you identify and resolve performance issues on your VPS.

Quick Diagnosis

Overview with htop

# Install htop
sudo apt install htop

# Launch
htop

htop shortcuts:

  • F6: Sort by CPU, RAM, etc.
  • F9: Kill a process
  • F5: Tree view
  • q: Quit

Key Indicators

# System load
uptime

# Result: load average: 0.50, 0.75, 0.60
# = load over 1min, 5min, 15min

Load average interpretation (for a VPS with N cores):

  • < N: Normal
  • = N: Maximum utilization
  • N: Overload

# See number of cores
nproc

Analyze CPU Usage

Identify Resource-Heavy Processes

# Top 10 by CPU
ps aux --sort=-%cpu | head -11

# Real-time
top -o %CPU

CPU by Process Over Time

# Monitor a specific process
pidstat -p PID 1

Profile a Service

# Install perf
sudo apt install linux-tools-common linux-tools-$(uname -r)

# Profile for 30 seconds
sudo perf top -p PID

Analyze Memory Usage

Overview

free -h
              total        used        free      shared  buff/cache   available
Mem: 3.8Gi 2.1Gi 200Mi 100Mi 1.5Gi 1.4Gi
Swap: 2.0Gi 500Mi 1.5Gi
  • available: Actually usable memory
  • buff/cache: System cache (releasable if needed)

Detail by Process

# Top 10 by RAM
ps aux --sort=-%mem | head -11

# With more details
ps aux --sort=-%mem | awk 'NR<=11{printf "%-10s %-8s %-8s %s\n", $1, $4"%", $6/1024"MB", $11}'

Memory Leaks

# Monitor process memory over time
watch -n 1 "ps -p PID -o pid,rss,vsz,pmem,comm"

If RAM continuously increases = memory leak.

Swap Usage

# See swap by process
for file in /proc/*/status ; do
awk '/VmSwap|Name/{printf $2 " " $3}END{ print ""}' $file 2>/dev/null
done | sort -k 2 -n -r | head -10

Analyze Disk I/O

Overview

# Install iotop
sudo apt install iotop

# Launch
sudo iotop -o

I/O Statistics

# Install sysstat
sudo apt install sysstat

# I/O stats every second
iostat -x 1

Important columns:

  • %util: Disk utilization (>80% = bottleneck)
  • await: I/O wait time (>20ms = slow)
  • r/s, w/s: Reads/writes per second

Identify I/O Processes

# Processes with most I/O
sudo iotop -o -b -n 3

Analyze Network

Bandwidth

# Install iftop
sudo apt install iftop

# Monitor
sudo iftop -i eth0

Active Connections

# Number of connections by state
ss -s

# Established connections
ss -tn state established

# Connections by IP
ss -tn | awk '{print $5}' | cut -d: -f1 | sort | uniq -c | sort -rn | head -10

Network Throughput

# Install nload
sudo apt install nload

# Monitor
nload eth0

Common Issues and Solutions

CPU at 100%

Cause: Runaway process

# Identify
top -o %CPU

# Solution: Restart the service
systemctl restart service_name

# Or kill the process (last resort)
kill -9 PID

Cause: Attack or spam

# Check connections
ss -tn | awk '{print $5}' | cut -d: -f1 | sort | uniq -c | sort -rn | head

# Block an IP
sudo ufw deny from SUSPICIOUS_IP

RAM Saturated

Immediate solution:

# Clear cache (safe)
sync; echo 3 | sudo tee /proc/sys/vm/drop_caches

Long-term solution:

  • Increase swap
  • Optimize application
  • Upgrade VPS

Slow Disk I/O

Cause: Large logs

# Identify
du -h /var/log/ | sort -hr | head

# Clean
sudo journalctl --vacuum-size=500M

Cause: Database queries

# View slow MySQL queries
sudo mysqladmin -u root -p processlist

Network Latency

# Test
ping -c 10 google.com
mtr google.com

Common Optimizations

MySQL/MariaDB

sudo nano /etc/mysql/mysql.conf.d/mysqld.cnf
[mysqld]
# Query cache
query_cache_type = 1
query_cache_size = 64M

# Buffers
innodb_buffer_pool_size = 512M
innodb_log_file_size = 128M

# Connections
max_connections = 100

Nginx

sudo nano /etc/nginx/nginx.conf
worker_processes auto;
worker_connections 1024;

# Gzip
gzip on;
gzip_types text/plain text/css application/json application/javascript;

# Cache
open_file_cache max=1000 inactive=20s;

PHP-FPM

sudo nano /etc/php/8.1/fpm/pool.d/www.conf
pm = dynamic
pm.max_children = 50
pm.start_servers = 5
pm.min_spare_servers = 5
pm.max_spare_servers = 35
pm.max_requests = 500

System Parameters

sudo nano /etc/sysctl.d/99-performance.conf
# Network
net.core.somaxconn = 65535
net.ipv4.tcp_max_tw_buckets = 1440000
net.ipv4.tcp_fin_timeout = 15

# Files
fs.file-max = 65535
sudo sysctl -p /etc/sysctl.d/99-performance.conf

Complete Diagnostic Script

#!/bin/bash
# /usr/local/bin/diagnose.sh

echo "=== PERFORMANCE DIAGNOSTIC $(date) ==="
echo ""

echo "--- System ---"
echo "OS: $(cat /etc/os-release | grep PRETTY_NAME | cut -d'"' -f2)"
echo "Kernel: $(uname -r)"
echo "Uptime: $(uptime -p)"
echo "Load: $(uptime | awk -F'load average:' '{print $2}')"
echo ""

echo "--- CPU ---"
echo "Cores: $(nproc)"
echo "Top 5 CPU:"
ps aux --sort=-%cpu | head -6 | tail -5 | awk '{printf " %-20s %s%%\n", $11, $3}'
echo ""

echo "--- Memory ---"
free -h | grep -E "Mem|Swap"
echo "Top 5 RAM:"
ps aux --sort=-%mem | head -6 | tail -5 | awk '{printf " %-20s %s%%\n", $11, $4}'
echo ""

echo "--- Disk ---"
df -h | grep -E "^/dev"
echo ""

echo "--- Network ---"
echo "Connections: $(ss -s | grep estab | awk '{print $4}' | cut -d, -f1)"
echo ""

echo "--- Critical Services ---"
for svc in nginx apache2 mysql mariadb php-fpm; do
if systemctl is-active --quiet $svc 2>/dev/null; then
echo " $svc: OK"
fi
done
echo ""

echo "=== END DIAGNOSTIC ==="
sudo chmod +x /usr/local/bin/diagnose.sh

Continuous Monitoring

# Installation
bash <(curl -Ss https://my-netdata.io/kickstart.sh)

# Access: http://YOUR_IP:19999

Alerts with cron

#!/bin/bash
# /usr/local/bin/perf-alert.sh

# Thresholds
CPU_THRESHOLD=80
MEM_THRESHOLD=85
DISK_THRESHOLD=90

# CPU check (load average)
LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs)
CORES=$(nproc)
LOAD_PCT=$(echo "$LOAD $CORES" | awk '{printf "%.0f", ($1/$2)*100}')

# RAM check
MEM_PCT=$(free | grep Mem | awk '{printf "%.0f", $3/$2 * 100}')

# Disk check
DISK_PCT=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')

# Alerts
if [ $LOAD_PCT -gt $CPU_THRESHOLD ]; then
echo "CPU ALERT: ${LOAD_PCT}%"
fi

if [ $MEM_PCT -gt $MEM_THRESHOLD ]; then
echo "RAM ALERT: ${MEM_PCT}%"
fi

if [ $DISK_PCT -gt $DISK_THRESHOLD ]; then
echo "DISK ALERT: ${DISK_PCT}%"
fi
Tip

For professional monitoring, see the Server Monitoring guide.