#!/bin/bash # Disk Health Check Script for Alma Linux 9 # Checks SSD TBW/lifespan and HDD health status SCRIPT_NAME=$(basename "$0") VERSION="2.1" # Color codes RED=$(tput setaf 1) GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) NC=$(tput sgr0) # Function to print colored output print_color() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # Check if command exists command_exists() { command -v "$1" >/dev/null 2>&1 } # Check dependencies check_dependencies() { local missing=() if ! command_exists smartctl; then missing+=("smartmontools") fi if ! command_exists bc; then missing+=("bc") fi if [[ ${#missing[@]} -gt 0 ]]; then print_color $RED "Error: Missing required packages: ${missing[*]}" echo "Install with: sudo dnf install ${missing[*]}" exit 1 fi } # Function to get disk type get_disk_type() { local disk=$1 local info=$(smartctl -i "$disk" 2>/dev/null) if echo "$info" | grep -q "Solid State Device"; then echo "SSD" elif echo "$info" | grep -q "Rotation Rate"; then echo "HDD" else echo "UNKNOWN" fi } # Function to calculate TBW for SSD calculate_tbw() { local raw_value=$1 local sectors=$2 if [[ -n "$sectors" && "$sectors" != "0" ]]; then local bytes=$((sectors * 512)) local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") echo "$tbw" elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") echo "$tbw" else echo "0" fi } # Function to estimate SSD endurance based on model and capacity estimate_ssd_endurance() { local disk_model=$1 local capacity_gb=$2 # Enterprise SSDs typically have higher endurance if echo "$disk_model" | grep -qi "MTFDDAK480TDS\|MICRON\|INTEL\|SAMSUNG\|KIOXIA"; then # Enterprise SSDs if [[ $capacity_gb -ge 1000 ]]; then echo "1200" # 1.2PB for 1TB enterprise elif [[ $capacity_gb -ge 480 ]]; then echo "600" # 600TB for 480GB enterprise elif [[ $capacity_gb -ge 240 ]]; then echo "300" # 300TB for 240GB enterprise else echo "150" # 150TB for smaller enterprise fi else # Consumer SSDs if [[ $capacity_gb -ge 1000 ]]; then echo "600" # 600TB for 1TB consumer elif [[ $capacity_gb -ge 480 ]]; then echo "300" # 300TB for 480GB consumer elif [[ $capacity_gb -ge 240 ]]; then echo "150" # 150TB for 240GB consumer elif [[ $capacity_gb -ge 120 ]]; then echo "80" # 80TB for 120GB consumer else echo "40" # 40TB for smaller drives fi fi } # Function to estimate SSD lifespan with TBW remaining estimate_ssd_lifespan() { local power_on_hours=$1 local tbw_used=$2 local disk_model=$3 local capacity_gb=$4 if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then echo "Unknown||Unknown" return fi local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear" elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear" else echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy" fi else echo "Unknown|${estimated_endurance} TB|New" fi } # Function to estimate HDD lifespan estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 if [[ -z "$power_on_hours" ]]; then echo "Unknown" return fi # Convert to integers power_on_hours=${power_on_hours:-0} reallocated_sectors=${reallocated_sectors:-0} pending_sectors=${pending_sectors:-0} if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" elif [[ "$reallocated_sectors" -gt 100 ]]; then echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" elif [[ "$reallocated_sectors" -gt 10 ]]; then echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" elif [[ "$power_on_hours" -gt 40000 ]]; then echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)" elif [[ "$power_on_hours" -gt 25000 ]]; then echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)" else echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)" fi } # Function to check disk SMART capabilities check_smart_capability() { local disk=$1 local info=$(smartctl -i "$disk" 2>/dev/null) if echo "$info" | grep -q "SMART support is: Available"; then if echo "$info" | grep -q "SMART support is: Enabled"; then echo "enabled" else echo "available" fi else echo "unavailable" fi } # Function to check a single disk check_disk() { local disk=$1 print_color $CYAN "Checking disk: $disk" echo "==================================================" # Check if disk exists and is accessible if [[ ! -b "$disk" ]]; then print_color $RED "Error: $disk is not a valid block device" echo "" return fi # Check SMART capability local smart_status=$(check_smart_capability "$disk") if [[ "$smart_status" == "unavailable" ]]; then print_color $YELLOW "SMART not supported on $disk" echo "" return elif [[ "$smart_status" == "available" ]]; then print_color $YELLOW "SMART available but not enabled on $disk" echo "Enable with: smartctl -s on $disk" echo "" return fi # Get basic disk information local info=$(smartctl -i "$disk" 2>/dev/null) local health=$(smartctl -H "$disk" 2>/dev/null) local attributes=$(smartctl -A "$disk" 2>/dev/null) # Check if smartctl command succeeded if [[ $? -ne 0 ]]; then print_color $RED "Error: Cannot read SMART data from $disk" echo "You may need to run this script as root" echo "" return fi # Extract disk information local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') # Extract capacity in GB for endurance calculation local capacity_gb=0 if echo "$capacity" | grep -qi "TB"; then capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) else capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) fi local disk_type=$(get_disk_type "$disk") local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) # Extract SMART attributes local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" echo "Capacity: ${capacity:-Unknown}" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" echo "Power On Hours: ${power_on_hours:-Unknown}" # Disk type specific analysis if [[ "$disk_type" == "SSD" ]]; then local tbw_used=0 if [[ -n "$total_written" && "$total_written" != "0" ]]; then tbw_used=$(calculate_tbw "" "$total_written") elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then tbw_used=$(calculate_tbw "$host_writes_32mib" "") fi echo "TBW Used: ${tbw_used} TB" local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) echo "TBW Remaining: $tbw_remaining" echo "Lifespan: $lifespan_percent ($wear_status)" elif [[ "$disk_type" == "HDD" ]]; then echo "Realloc Sectors: ${reallocated_sectors:-0}" echo "Pending Sectors: ${pending_sectors:-0}" local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") echo "Lifespan: $lifespan" else print_color $YELLOW "Limited information available for this disk type" fi echo "" } # Function to detect all disks detect_disks() { local disks=() # Check for SATA/SAS disks for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done # Check for NVMe disks for disk in /dev/nvme[0-9]n[0-9] /dev/nvme[0-9]n[0-9]p[0-9]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done # Check for other disk types for disk in /dev/vd[a-z] /dev/xvd[a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done echo "${disks[@]}" } # Main function main() { print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9" print_color $BLUE "====================================================" echo "" check_dependencies local disks=() # If specific disk provided, check only that disk if [[ $# -gt 0 ]]; then for disk in "$@"; do if [[ -b "$disk" ]]; then disks+=("$disk") else print_color $RED "Error: $disk is not a valid block device" fi done else # Auto-detect disks print_color $CYAN "Auto-detecting disks..." read -ra disks <<< "$(detect_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then print_color $RED "No disks found or accessible" echo "Try running as root or specifying disk paths manually" exit 1 fi print_color $GREEN "Found ${#disks[@]} disk(s) to check" echo "" # Check if running as root, warn if not if [[ $EUID -ne 0 ]]; then print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." echo "For complete results, run as: sudo $0" echo "" fi # Check each disk for disk in "${disks[@]}"; do check_disk "$disk" done print_color $BLUE "Check completed!" } # Usage information usage() { echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" echo "" echo "If no disks specified, auto-detects all available disks" echo "" echo "Examples:" echo " $SCRIPT_NAME # Check all auto-detected disks" echo " sudo $SCRIPT_NAME # Check all disks (as root)" echo " $SCRIPT_NAME /dev/sda # Check specific disk" echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" } # Parse command line arguments case "${1:-}" in -h|--help) usage exit 0 ;; -v|--version) echo "$SCRIPT_NAME version $VERSION" exit 0 ;; *) main "$@" ;; esac