#!/bin/bash # Disk Health Check Script for Ubuntu 24.04 # Enhanced with SAS/PERC H730P controller support # Checks SSD TBW/lifespan and HDD health status SCRIPT_NAME=$(basename "$0") VERSION="2.8" # Color codes RED=$(tput setaf 1) GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) NC=$(tput sgr0) # Function to print colored output print_color() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # Check if command exists command_exists() { command -v "$1" >/dev/null 2>&1 } # Check dependencies check_dependencies() { local missing=() if ! command_exists smartctl; then missing+=("smartmontools") fi if ! command_exists bc; then missing+=("bc") fi if [[ ${#missing[@]} -gt 0 ]]; then print_color $RED "Error: Missing required packages: ${missing[*]}" echo "Install with: sudo apt update && sudo apt install ${missing[*]}" exit 1 fi } # Function to test SMART access test_smart_access() { local disk=$1 local controller=$2 local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" if ! $smart_cmd -i "$disk" &>/dev/null; then echo "no_access" return fi echo "full_access" } # Function to get basic disk info get_basic_disk_info() { local disk=$1 local controller=$2 local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" local info=$($smart_cmd -i "$disk" 2>/dev/null) local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) # Get disk type local disk_type="UNKNOWN" if echo "$info" | grep -qi "Solid State Device\|NVMe"; then disk_type="SSD" elif echo "$info" | grep -qi "Rotation Rate"; then disk_type="HDD" fi echo "$model|$serial|$capacity|$firmware|$disk_type" } # Function to get SATA/SAS disk details get_sata_disk_details() { local disk=$1 local controller=$2 local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" local health=$($smart_cmd -H "$disk" 2>/dev/null) local attributes=$($smart_cmd -A "$disk" 2>/dev/null) local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | head -1 | awk '{print $10}') local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | head -1 | awk '{print $10}') local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | head -1 | awk '{print $10}') # Kingston SA400 specific attributes local total_written=$(echo "$attributes" | grep -i "Flash_Writes_GiB\|Lifetime_Writes_GiB" | head -1 | awk '{print $NF}') local media_wearout=$(echo "$attributes" | grep -i "SSD_Life_Left" | head -1 | awk '{print $NF}') # Standard SATA attributes if [[ -z "$total_written" ]]; then total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | head -1 | awk '{print $10}') fi echo "$health_status|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$media_wearout" } # Function to get NVMe disk details get_nvme_disk_details() { local disk=$1 local info=$(smartctl -i "$disk" 2>/dev/null) local health=$(smartctl -H "$disk" 2>/dev/null) local attributes=$(smartctl -A "$disk" 2>/dev/null) local health_status=$(echo "$health" | grep -i "Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) local power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}') local data_units_written=$(echo "$attributes" | grep -i "Data Units Written" | head -1 | awk '{print $NF}') local percentage_used=$(echo "$attributes" | grep -i "Percentage Used" | head -1 | awk '{print $NF}') echo "$health_status|$power_on_hours|0|0|$data_units_written|$percentage_used" } # Function to calculate TBW for SSD calculate_tbw() { local raw_value=$1 local disk_model=$2 local attribute_name=$3 # Kingston SA400 SSDs use Flash_Writes_GiB (value in GiB) if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") echo "$tbw" return fi fi # NVMe drives use Data Units Written (1 unit = 1,000,000 bytes) if echo "$disk_model" | grep -qi "NVMe" || [[ "$attribute_name" == *"Data Units Written"* ]]; then if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null) local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") echo "$tbw" return fi fi # Standard SATA SSDs with Total_LBAs_Written if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then local bytes=$((raw_value * 512)) local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") echo "$tbw" return fi echo "0" } # Function to estimate SSD endurance estimate_ssd_endurance() { local disk_model=$1 local capacity_gb=$2 if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then if [[ $capacity_gb -ge 960 ]]; then echo "300" elif [[ $capacity_gb -ge 480 ]]; then echo "150" else echo "80" fi elif echo "$disk_model" | grep -qi "KINGSTON.*SA2000"; then if [[ $capacity_gb -ge 2000 ]]; then echo "800" elif [[ $capacity_gb -ge 1000 ]]; then echo "400" elif [[ $capacity_gb -ge 500 ]]; then echo "200" else echo "100" fi elif echo "$disk_model" | grep -qi "NVMe"; then if [[ $capacity_gb -ge 2000 ]]; then echo "1200" elif [[ $capacity_gb -ge 1000 ]]; then echo "600" elif [[ $capacity_gb -ge 500 ]]; then echo "300" else echo "150" fi else if [[ $capacity_gb -ge 1000 ]]; then echo "600" elif [[ $capacity_gb -ge 480 ]]; then echo "300" elif [[ $capacity_gb -ge 240 ]]; then echo "150" else echo "80" fi fi } # Function to estimate SSD lifespan estimate_ssd_lifespan() { local tbw_used=$1 local disk_model=$2 local capacity_gb=$3 local media_wearout=$4 local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") if [[ -z "$estimated_endurance" || "$estimated_endurance" -eq 0 ]]; then echo "Unknown||Unknown||Unknown" return fi local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") # Use media wearout indicator if available if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then # For NVMe, percentage_used needs to be converted to remaining if echo "$disk_model" | grep -qi "NVMe" && [[ $media_wearout -le 100 ]]; then local remaining=$((100 - media_wearout)) media_wearout=$remaining fi if [[ $media_wearout -le 10 ]]; then echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" elif [[ $media_wearout -le 30 ]]; then echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" elif [[ $media_wearout -le 70 ]]; then echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" else echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" fi return fi # Fall back to TBW calculation if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" else echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" fi else echo "Unknown|${estimated_endurance} TB|New|estimated" fi } # Function to estimate HDD lifespan estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') clean_hours=${clean_hours:-0} if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then echo "Unknown" return fi reallocated_sectors=${reallocated_sectors:-0} pending_sectors=${pending_sectors:-0} if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" elif [[ "$reallocated_sectors" -gt 100 ]]; then echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" elif [[ "$reallocated_sectors" -gt 10 ]]; then echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" elif [[ "$clean_hours" -gt 40000 ]]; then echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)" elif [[ "$clean_hours" -gt 25000 ]]; then echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)" else echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)" fi } # Function to check a single disk check_disk() { local disk=$1 local controller=$2 print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" echo "==================================================" local access_level=$(test_smart_access "$disk" "$controller") if [[ "$access_level" != "full_access" ]]; then case $access_level in "no_access") print_color $RED "ERROR: Cannot access disk through controller" echo "Possible reasons:" echo " - Controller doesn't support SMART passthrough" echo " - Disk is part of a hardware RAID array" echo " - Insufficient permissions (try running as root)" echo " - Controller busy or offline" ;; esac echo "" return fi # Get basic disk information local basic_info=$(get_basic_disk_info "$disk" "$controller") IFS='|' read -r model serial capacity firmware disk_type <<< "$basic_info" # Get detailed information based on disk type local details="" if [[ "$disk_type" == "SSD" ]] && echo "$model" | grep -qi "NVMe"; then details=$(get_nvme_disk_details "$disk") else details=$(get_sata_disk_details "$disk" "$controller") fi IFS='|' read -r health_status power_on_hours reallocated_sectors pending_sectors total_written media_wearout <<< "$details" # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" echo "Capacity: ${capacity:-Unknown}" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then echo "Power On Hours: $power_on_hours" else echo "Power On Hours: Unknown" fi # Disk type specific analysis if [[ "$disk_type" == "SSD" ]]; then # Calculate capacity in GB local capacity_gb=0 if echo "$capacity" | grep -qi "GB"; then capacity_gb=$(echo "$capacity" | grep -o '[0-9]*' | head -1) elif echo "$capacity" | grep -qi "TB"; then capacity_gb=$(echo "$capacity" | grep -o '[0-9]*' | head -1) capacity_gb=$((capacity_gb * 1000)) else # Extract from raw bytes local bytes=$(echo "$capacity" | grep -o '[0-9]*' | head -1) capacity_gb=$((bytes / 1000000000)) fi # Get attribute name for TBW calculation local attribute_name="" if echo "$model" | grep -qi "KINGSTON.*SA400"; then attribute_name="Flash_Writes_GiB" elif echo "$model" | grep -qi "NVMe"; then attribute_name="Data Units Written" else attribute_name="Total_LBAs_Written" fi local tbw_used=$(calculate_tbw "$total_written" "$model" "$attribute_name") echo "TBW Used: ${tbw_used} TB" local lifespan_info=$(estimate_ssd_lifespan "$tbw_used" "$model" "$capacity_gb" "$media_wearout") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) echo "TBW Remaining: $tbw_remaining" echo "Lifespan: $lifespan_percent ($wear_status)" if [[ "$wear_source" == "media_wearout" ]]; then echo "Wear Source: Media Wearout Indicator" elif [[ "$wear_source" == "tbw" ]]; then echo "Wear Source: TBW Calculation" elif [[ "$wear_source" == "estimated" ]]; then echo "Wear Source: Estimated Endurance" fi elif [[ "$disk_type" == "HDD" ]]; then if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then echo "Realloc Sectors: $reallocated_sectors" fi if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then echo "Pending Sectors: $pending_sectors" fi local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") echo "Lifespan: $lifespan" else print_color $YELLOW "Limited information available for this disk type" fi echo "" } # Function to detect all disks detect_disks() { local disks=() for disk in /dev/sd[a-z] /dev/nvme[0-9]n[0-9] /dev/sg[0-9] /dev/vd[a-z] /dev/xvd[a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done echo "${disks[@]}" } # Main function main() { print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" print_color $BLUE "============================================" echo "" check_dependencies local disks=() if [[ $# -gt 0 ]]; then for disk in "$@"; do if [[ -b "$disk" ]]; then disks+=("$disk") else print_color $RED "Error: $disk is not a valid block device" fi done else print_color $CYAN "Auto-detecting disks..." read -ra disks <<< "$(detect_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then print_color $RED "No disks found or accessible" exit 1 fi print_color $GREEN "Found ${#disks[@]} disk(s) to check" echo "" if [[ $EUID -ne 0 ]]; then print_color $YELLOW "Warning: Not running as root. Some information may be limited." echo "" fi for disk in "${disks[@]}"; do check_disk "$disk" done print_color $BLUE "Check completed!" } # Usage information usage() { echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" echo "" echo "Examples:" echo " $SCRIPT_NAME # Check all disks" echo " sudo $SCRIPT_NAME # Check all disks (as root)" echo " $SCRIPT_NAME /dev/sda # Check specific disk" echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" } case "${1:-}" in -h|--help) usage; exit 0 ;; -v|--version) echo "$SCRIPT_NAME version $VERSION"; exit 0 ;; *) main "$@" ;; esac