711 lines
24 KiB
Bash
Executable File
711 lines
24 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Disk Health Check Script for Alma Linux 9
|
|
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
|
|
# Supports consumer and enterprise disk classification
|
|
|
|
SCRIPT_NAME=$(basename "$0")
|
|
VERSION="2.7"
|
|
|
|
# Color codes
|
|
RED=$(tput setaf 1)
|
|
GREEN=$(tput setaf 2)
|
|
YELLOW=$(tput setaf 3)
|
|
BLUE=$(tput setaf 4)
|
|
CYAN=$(tput setaf 6)
|
|
MAGENTA=$(tput setaf 5)
|
|
NC=$(tput sgr0)
|
|
|
|
# Function to print colored output
|
|
print_color() {
|
|
local color=$1
|
|
local message=$2
|
|
echo -e "${color}${message}${NC}"
|
|
}
|
|
|
|
# Check if command exists
|
|
command_exists() {
|
|
command -v "$1" >/dev/null 2>&1
|
|
}
|
|
|
|
# Check dependencies
|
|
check_dependencies() {
|
|
local missing=()
|
|
|
|
if ! command_exists smartctl; then
|
|
missing+=("smartmontools")
|
|
fi
|
|
|
|
if ! command_exists bc; then
|
|
missing+=("bc")
|
|
fi
|
|
|
|
if [[ ${#missing[@]} -gt 0 ]]; then
|
|
print_color $RED "Error: Missing required packages: ${missing[*]}"
|
|
echo "Install with: sudo dnf install ${missing[*]}"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# TBW endurance standards (using lowest numbers)
|
|
declare -A CONSUMER_TBW=(
|
|
["250"]=150
|
|
["500"]=300
|
|
["1000"]=600
|
|
["2000"]=1200
|
|
["4000"]=2400
|
|
["8000"]=4800
|
|
)
|
|
|
|
declare -A ENTERPRISE_TBW=(
|
|
["250"]=450
|
|
["500"]=900
|
|
["1000"]=1800
|
|
["2000"]=3600
|
|
["4000"]=7200
|
|
["8000"]=14400
|
|
)
|
|
|
|
# Function to get closest capacity tier
|
|
get_capacity_tier() {
|
|
local capacity_gb=$1
|
|
local tiers=("250" "500" "1000" "2000" "4000" "8000")
|
|
|
|
for tier in "${tiers[@]}"; do
|
|
if [[ $capacity_gb -le $tier ]]; then
|
|
echo $tier
|
|
return
|
|
fi
|
|
done
|
|
# For larger than 8TB, use proportional scaling from 4TB
|
|
echo "8000"
|
|
}
|
|
|
|
# Function to convert bytes to human readable
|
|
bytes_to_human() {
|
|
local bytes=$1
|
|
if [[ $bytes -ge 1099511627776 ]]; then
|
|
echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB"
|
|
elif [[ $bytes -ge 1073741824 ]]; then
|
|
echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB"
|
|
elif [[ $bytes -ge 1048576 ]]; then
|
|
echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB"
|
|
else
|
|
echo "$bytes bytes"
|
|
fi
|
|
}
|
|
|
|
# Function to extract numeric hours from power_on_hours field
|
|
extract_numeric_hours() {
|
|
local power_on_hours=$1
|
|
# Remove everything after non-numeric characters
|
|
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
|
|
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
|
|
echo "$numeric_hours"
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
# Function to get disk type and interface
|
|
get_disk_info() {
|
|
local disk=$1
|
|
|
|
local info=$(smartctl -i "$disk" 2>/dev/null)
|
|
local transport=""
|
|
local disk_type="UNKNOWN"
|
|
local is_enterprise=false
|
|
|
|
# Check if it's NVMe
|
|
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
|
|
disk_type="NVMe"
|
|
transport="NVMe"
|
|
# Check for SAS
|
|
elif echo "$info" | grep -qi "SAS"; then
|
|
disk_type="SAS"
|
|
transport="SAS"
|
|
is_enterprise=true
|
|
# Check for SATA SSD
|
|
elif echo "$info" | grep -qi "Solid State Device"; then
|
|
disk_type="SSD"
|
|
transport="SATA"
|
|
# Check for SATA HDD
|
|
elif echo "$info" | grep -qi "Rotation Rate"; then
|
|
disk_type="HDD"
|
|
transport="SATA"
|
|
fi
|
|
|
|
# Check for enterprise features
|
|
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
|
|
is_enterprise=true
|
|
fi
|
|
|
|
# Check device type by model name
|
|
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
|
|
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
|
|
is_enterprise=true
|
|
fi
|
|
fi
|
|
|
|
echo "$disk_type|$transport|$is_enterprise"
|
|
}
|
|
|
|
# Function to calculate TBW for SSD/NVMe
|
|
calculate_tbw() {
|
|
local disk_type=$1
|
|
local raw_value=$2
|
|
local sectors=$3
|
|
|
|
local tbw=0
|
|
|
|
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
|
|
# Calculate from sectors (most common for SATA SSDs)
|
|
local bytes=$((sectors * 512))
|
|
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
|
|
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
|
|
if [[ "$disk_type" == "NVMe" ]]; then
|
|
# NVMe: raw value is in 32MB units
|
|
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
|
|
else
|
|
# SATA SSD: various manufacturers
|
|
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
|
|
fi
|
|
fi
|
|
|
|
echo "$tbw"
|
|
}
|
|
|
|
# Function to get estimated endurance
|
|
get_estimated_endurance() {
|
|
local capacity_gb=$1
|
|
local is_enterprise=$2
|
|
local disk_type=$3
|
|
|
|
# HDDs don't have TBW
|
|
if [[ "$disk_type" == "HDD" ]]; then
|
|
echo "N/A"
|
|
return
|
|
fi
|
|
|
|
local capacity_tier=$(get_capacity_tier "$capacity_gb")
|
|
|
|
if [[ "$is_enterprise" == "true" ]]; then
|
|
echo "${ENTERPRISE_TBW[$capacity_tier]}"
|
|
else
|
|
echo "${CONSUMER_TBW[$capacity_tier]}"
|
|
fi
|
|
}
|
|
|
|
# Function to estimate SSD lifespan with TBW remaining
|
|
estimate_ssd_lifespan() {
|
|
local power_on_hours=$1
|
|
local tbw_used=$2
|
|
local estimated_endurance=$3
|
|
local disk_type=$4
|
|
|
|
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
|
|
echo "Unknown||Unknown|New"
|
|
return
|
|
fi
|
|
|
|
if [[ "$estimated_endurance" == "N/A" ]]; then
|
|
echo "N/A|N/A|N/A|HDD"
|
|
return
|
|
fi
|
|
|
|
# Handle the case where tbw_used might have formatting issues
|
|
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
|
|
if [[ -z "$clean_tbw_used" ]]; then
|
|
clean_tbw_used=0
|
|
fi
|
|
|
|
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance")
|
|
|
|
if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
|
|
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
|
|
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
|
|
|
|
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
|
|
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
|
|
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
|
|
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
|
|
else
|
|
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
|
|
fi
|
|
else
|
|
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
|
|
fi
|
|
}
|
|
|
|
# Function to estimate HDD lifespan
|
|
estimate_hdd_lifespan() {
|
|
local power_on_hours=$1
|
|
local reallocated_sectors=$2
|
|
local pending_sectors=$3
|
|
local start_stop_count=$4
|
|
local load_cycle_count=$5
|
|
local disk_type=$6
|
|
|
|
# Extract numeric hours only
|
|
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
|
|
|
|
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
|
|
echo "Unknown"
|
|
return
|
|
fi
|
|
|
|
local severity=0
|
|
|
|
# Critical issues
|
|
if [[ "$pending_sectors" -gt 0 ]]; then
|
|
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
|
|
return
|
|
elif [[ "$reallocated_sectors" -gt 100 ]]; then
|
|
severity=$((severity + 3))
|
|
elif [[ "$reallocated_sectors" -gt 10 ]]; then
|
|
severity=$((severity + 2))
|
|
elif [[ "$reallocated_sectors" -gt 0 ]]; then
|
|
severity=$((severity + 1))
|
|
fi
|
|
|
|
# Usage-based assessment
|
|
if [[ "$numeric_hours" -gt 50000 ]]; then
|
|
severity=$((severity + 3))
|
|
elif [[ "$numeric_hours" -gt 30000 ]]; then
|
|
severity=$((severity + 2))
|
|
elif [[ "$numeric_hours" -gt 15000 ]]; then
|
|
severity=$((severity + 1))
|
|
fi
|
|
|
|
# Mechanical wear (for HDDs)
|
|
if [[ "$disk_type" == "HDD" ]]; then
|
|
if [[ "$start_stop_count" -gt 50000 ]]; then
|
|
severity=$((severity + 2))
|
|
elif [[ "$start_stop_count" -gt 20000 ]]; then
|
|
severity=$((severity + 1))
|
|
fi
|
|
|
|
if [[ "$load_cycle_count" -gt 500000 ]]; then
|
|
severity=$((severity + 2))
|
|
elif [[ "$load_cycle_count" -gt 200000 ]]; then
|
|
severity=$((severity + 1))
|
|
fi
|
|
fi
|
|
|
|
if [[ $severity -ge 5 ]]; then
|
|
echo "${RED}< 6 months${NC} (Multiple risk factors)"
|
|
elif [[ $severity -ge 3 ]]; then
|
|
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
|
|
elif [[ $severity -ge 1 ]]; then
|
|
echo "${YELLOW}1-3 years${NC} (Light wear)"
|
|
else
|
|
echo "${GREEN}> 3 years${NC} (Healthy)"
|
|
fi
|
|
}
|
|
|
|
# Function to check soft-raid (MDRAID)
|
|
check_mdraid() {
|
|
local md_devices=()
|
|
|
|
if [[ -f /proc/mdstat ]]; then
|
|
while IFS= read -r line; do
|
|
if [[ $line =~ ^md[0-9]+ ]]; then
|
|
md_devices+=("/dev/${line%% *}")
|
|
fi
|
|
done < /proc/mdstat
|
|
fi
|
|
|
|
for md in "${md_devices[@]}"; do
|
|
if [[ -b "$md" ]]; then
|
|
print_color $MAGENTA "Found software RAID: $md"
|
|
if command_exists mdadm; then
|
|
local md_info=$(mdadm --detail "$md" 2>/dev/null)
|
|
if [[ -n "$md_info" ]]; then
|
|
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
|
|
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
|
|
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
|
|
echo ""
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Function to extract capacity in GB from various formats - IMPROVED VERSION
|
|
extract_capacity_gb() {
|
|
local capacity=$1
|
|
local capacity_gb=0
|
|
|
|
# Remove extra spaces and normalize
|
|
capacity=$(echo "$capacity" | sed 's/ */ /g')
|
|
|
|
# Method 1: Try to extract from User Capacity field with bytes
|
|
if [[ $capacity =~ ([0-9,]+)\s*bytes ]]; then
|
|
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
|
|
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
|
|
fi
|
|
|
|
# Method 2: Try to extract from bracket format [XXX GB] or [X.XX TB]
|
|
if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ \[([0-9,.]+)\s*([GT])B?\] ]]; then
|
|
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
|
|
local unit="${BASH_REMATCH[2]}"
|
|
if [[ "$unit" == "T" ]]; then
|
|
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
|
|
else
|
|
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
|
|
fi
|
|
fi
|
|
|
|
# Method 3: Try direct TB/GB pattern matching
|
|
if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*TB ]]; then
|
|
local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',')
|
|
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
|
|
elif [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*GB ]]; then
|
|
local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',')
|
|
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
|
|
fi
|
|
|
|
# Method 4: For NVMe - try different field formats
|
|
if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then
|
|
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
|
|
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
|
|
fi
|
|
|
|
# Ensure we have a valid number
|
|
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
|
|
echo "0"
|
|
else
|
|
echo "$capacity_gb"
|
|
fi
|
|
}
|
|
|
|
# Function to get NVMe capacity using smartctl
|
|
get_nvme_capacity() {
|
|
local disk=$1
|
|
local nvme_info=$(smartctl -i "$disk" 2>/dev/null)
|
|
local capacity=""
|
|
|
|
# Try to get capacity from different fields
|
|
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
if [[ -z "$capacity" ]]; then
|
|
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
fi
|
|
if [[ -z "$capacity" ]]; then
|
|
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
|
|
fi
|
|
|
|
echo "$capacity"
|
|
}
|
|
|
|
# Function to get human readable capacity
|
|
get_human_capacity() {
|
|
local capacity_gb=$1
|
|
|
|
if [[ $capacity_gb -ge 1000 ]]; then
|
|
echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB"
|
|
else
|
|
echo "${capacity_gb} GB"
|
|
fi
|
|
}
|
|
|
|
# Function to get capacity using block device information as fallback
|
|
get_block_device_capacity() {
|
|
local disk=$1
|
|
local capacity_gb=0
|
|
|
|
# Try to get capacity from block device using lsblk or fdisk
|
|
if command_exists lsblk; then
|
|
local block_size=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null | head -1)
|
|
if [[ -n "$block_size" && "$block_size" =~ ^[0-9]+$ ]]; then
|
|
capacity_gb=$(echo "scale=0; $block_size / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
|
|
fi
|
|
fi
|
|
|
|
# Alternative method using fdisk
|
|
if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then
|
|
local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk")
|
|
if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then
|
|
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
|
|
local unit="${BASH_REMATCH[2]}"
|
|
if [[ "$unit" == "T" ]]; then
|
|
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
|
|
else
|
|
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
echo "$capacity_gb"
|
|
}
|
|
|
|
# Function to check a single disk
|
|
check_disk() {
|
|
local disk=$1
|
|
|
|
print_color $CYAN "Checking disk: $disk"
|
|
echo "=================================================="
|
|
|
|
# Check if disk exists and is accessible
|
|
if [[ ! -b "$disk" ]]; then
|
|
print_color $RED "Error: $disk is not a valid block device"
|
|
echo ""
|
|
return
|
|
fi
|
|
|
|
# Get disk information
|
|
local disk_info=$(get_disk_info "$disk")
|
|
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
|
|
local transport=$(echo "$disk_info" | cut -d'|' -f2)
|
|
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
|
|
|
|
# Get basic disk information
|
|
local info=$(smartctl -i "$disk" 2>/dev/null)
|
|
local health=$(smartctl -H "$disk" 2>/dev/null)
|
|
local attributes=$(smartctl -A "$disk" 2>/dev/null)
|
|
|
|
# Check if smartctl command succeeded
|
|
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
|
|
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
|
|
echo ""
|
|
return
|
|
fi
|
|
|
|
# Extract disk information
|
|
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
[[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
|
|
local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
|
|
local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
|
|
# For NVMe disks, try to get capacity from different fields
|
|
if [[ "$disk_type" == "NVMe" ]]; then
|
|
local nvme_capacity=$(get_nvme_capacity "$disk")
|
|
if [[ -n "$nvme_capacity" ]]; then
|
|
capacity="$nvme_capacity"
|
|
fi
|
|
fi
|
|
|
|
# Extract capacity in GB and human readable format
|
|
local capacity_gb=$(extract_capacity_gb "$capacity")
|
|
local capacity_human=""
|
|
|
|
# If capacity extraction failed, try block device method
|
|
if [[ "$capacity_gb" -eq 0 ]]; then
|
|
capacity_gb=$(get_block_device_capacity "$disk")
|
|
fi
|
|
|
|
# Generate human readable capacity
|
|
if [[ "$capacity_gb" -gt 0 ]]; then
|
|
capacity_human=$(get_human_capacity "$capacity_gb")
|
|
else
|
|
capacity_human="Unknown"
|
|
fi
|
|
|
|
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
|
|
|
|
# Extract SMART attributes
|
|
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
|
|
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
|
|
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
|
|
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
|
|
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
|
|
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
|
|
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
|
|
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
|
|
|
|
# For NVMe disks using smartctl extended attributes
|
|
if [[ "$disk_type" == "NVMe" ]]; then
|
|
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
|
|
# Extract data units written for NVMe
|
|
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',')
|
|
if [[ -n "$data_units_written" ]]; then
|
|
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
|
|
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
|
|
else
|
|
# Try alternative field
|
|
data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
|
|
if [[ -n "$data_units_written" ]]; then
|
|
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
|
|
fi
|
|
fi
|
|
# Get power on hours for NVMe
|
|
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
|
|
if [[ -n "$nvme_power_hours" ]]; then
|
|
power_on_hours="$nvme_power_hours"
|
|
fi
|
|
fi
|
|
|
|
# Display basic information
|
|
echo "Model: ${model:-Unknown}"
|
|
echo "Serial: ${serial:-Unknown}"
|
|
echo "Type: $disk_type"
|
|
echo "Interface: $transport"
|
|
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
|
|
echo "Capacity: $capacity_human"
|
|
echo "Firmware: ${firmware:-Unknown}"
|
|
echo "Health: ${health_status:-Unknown}"
|
|
echo "Power On Hours: ${power_on_hours:-Unknown}"
|
|
|
|
# Disk type specific analysis
|
|
if [[ "$disk_type" == "HDD" ]]; then
|
|
echo "Realloc Sectors: ${reallocated_sectors:-0}"
|
|
echo "Pending Sectors: ${pending_sectors:-0}"
|
|
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
|
|
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
|
|
|
|
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
|
|
echo "Lifespan: $lifespan"
|
|
|
|
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
|
|
local tbw_used=0
|
|
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
|
|
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
|
|
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
|
|
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
|
|
fi
|
|
|
|
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
|
|
|
|
echo "TBW Used: ${tbw_used} TB"
|
|
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
|
|
|
|
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
|
|
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
|
|
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
|
|
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
|
|
|
|
if [[ "$estimated_endurance" != "N/A" ]]; then
|
|
echo "TBW Remaining: $tbw_remaining"
|
|
echo "Lifespan: $lifespan_percent ($wear_status)"
|
|
fi
|
|
|
|
# Show mechanical attributes for SAS drives that might be SSDs
|
|
if [[ "$disk_type" == "SAS" ]]; then
|
|
echo "Realloc Sectors: ${reallocated_sectors:-0}"
|
|
echo "Pending Sectors: ${pending_sectors:-0}"
|
|
fi
|
|
else
|
|
print_color $YELLOW "Unknown disk type - limited information available"
|
|
fi
|
|
|
|
echo ""
|
|
}
|
|
|
|
# Function to detect all disks
|
|
detect_disks() {
|
|
local disks=()
|
|
|
|
# Check for SATA/SAS disks
|
|
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
|
|
if [[ -b "$disk" ]]; then
|
|
disks+=("$disk")
|
|
fi
|
|
done
|
|
|
|
# Check for NVMe disks (base devices only, no partitions)
|
|
for disk in /dev/nvme[0-9]n[0-9]; do
|
|
if [[ -b "$disk" ]]; then
|
|
disks+=("$disk")
|
|
fi
|
|
done
|
|
|
|
# Check for other disk types
|
|
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
|
|
if [[ -b "$disk" ]]; then
|
|
disks+=("$disk")
|
|
fi
|
|
done
|
|
|
|
echo "${disks[@]}"
|
|
}
|
|
|
|
# Main function
|
|
main() {
|
|
print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9"
|
|
print_color $BLUE "Created by Adam T. Lau"
|
|
print_color $BLUE "===================================================="
|
|
echo ""
|
|
|
|
check_dependencies
|
|
|
|
local disks=()
|
|
|
|
# Check for soft-raid first
|
|
check_mdraid
|
|
|
|
# If specific disk provided, check only that disk
|
|
if [[ $# -gt 0 ]]; then
|
|
for disk in "$@"; do
|
|
if [[ -b "$disk" ]]; then
|
|
disks+=("$disk")
|
|
else
|
|
print_color $RED "Error: $disk is not a valid block device"
|
|
fi
|
|
done
|
|
else
|
|
# Auto-detect disks
|
|
print_color $CYAN "Auto-detecting disks..."
|
|
read -ra disks <<< "$(detect_disks)"
|
|
fi
|
|
|
|
if [[ ${#disks[@]} -eq 0 ]]; then
|
|
print_color $RED "No disks found or accessible"
|
|
echo "Try running as root or specifying disk paths manually"
|
|
exit 1
|
|
fi
|
|
|
|
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
|
|
echo ""
|
|
|
|
# Check if running as root, warn if not
|
|
if [[ $EUID -ne 0 ]]; then
|
|
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
|
|
echo "For complete results, run as: sudo $0"
|
|
echo ""
|
|
fi
|
|
|
|
# Check each disk
|
|
for disk in "${disks[@]}"; do
|
|
check_disk "$disk"
|
|
done
|
|
|
|
print_color $BLUE "Check completed!"
|
|
echo ""
|
|
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
|
|
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
|
|
}
|
|
|
|
# Usage information
|
|
usage() {
|
|
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
|
|
echo ""
|
|
echo "If no disks specified, auto-detects all available disks"
|
|
echo ""
|
|
echo "Examples:"
|
|
echo " $SCRIPT_NAME # Check all auto-detected disks"
|
|
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
|
|
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
|
|
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
|
|
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
|
|
echo ""
|
|
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
|
|
}
|
|
|
|
# Parse command line arguments
|
|
case "${1:-}" in
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
-v|--version)
|
|
echo "$SCRIPT_NAME version $VERSION"
|
|
exit 0
|
|
;;
|
|
*)
|
|
main "$@"
|
|
;;
|
|
esac
|