Files
disk-health/alma-v2.1.sh
2025-10-22 03:04:37 +08:00

399 lines
13 KiB
Bash
Executable File

#!/bin/bash
# Disk Health Check Script for Alma Linux 9
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.1"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo dnf install ${missing[*]}"
exit 1
fi
}
# Function to get disk type
get_disk_type() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
if echo "$info" | grep -q "Solid State Device"; then
echo "SSD"
elif echo "$info" | grep -q "Rotation Rate"; then
echo "HDD"
else
echo "UNKNOWN"
fi
}
# Function to calculate TBW for SSD
calculate_tbw() {
local raw_value=$1
local sectors=$2
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Enterprise SSDs typically have higher endurance
if echo "$disk_model" | grep -qi "MTFDDAK480TDS\|MICRON\|INTEL\|SAMSUNG\|KIOXIA"; then
# Enterprise SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy"
fi
else
echo "Unknown|${estimated_endurance} TB|New"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
# Convert to integers
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check disk SMART capabilities
check_smart_capability() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
if echo "$info" | grep -q "SMART support is: Available"; then
if echo "$info" | grep -q "SMART support is: Enabled"; then
echo "enabled"
else
echo "available"
fi
else
echo "unavailable"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Check SMART capability
local smart_status=$(check_smart_capability "$disk")
if [[ "$smart_status" == "unavailable" ]]; then
print_color $YELLOW "SMART not supported on $disk"
echo ""
return
elif [[ "$smart_status" == "available" ]]; then
print_color $YELLOW "SMART available but not enabled on $disk"
echo "Enable with: smartctl -s on $disk"
echo ""
return
fi
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]]; then
print_color $RED "Error: Cannot read SMART data from $disk"
echo "You may need to run this script as root"
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract capacity in GB for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
fi
local disk_type=$(get_disk_type "$disk")
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
# Extract SMART attributes
local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "")
fi
echo "TBW Used: ${tbw_used} TB"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
elif [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks
for disk in /dev/nvme[0-9]n[0-9] /dev/nvme[0-9]n[0-9]p[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9"
print_color $BLUE "===================================================="
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac