#!/bin/bash # Disk Health Check Script for Harvester OS # Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid # Supports consumer and enterprise disk classification SCRIPT_NAME=$(basename "$0") VERSION="3.5" # Color codes RED=$(tput setaf 1) GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) MAGENTA=$(tput setaf 5) NC=$(tput sgr0) # Function to print colored output print_color() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # Check if required commands are installed command_exists() { command -v "$1" >/dev/null 2>&1 } if ! command_exists smartctl; then print_color $RED "Error: smartctl is not installed. Please install smartmontools package." exit 1 fi # Known model capacities declare -A MODEL_CAPACITIES=( ["ST91000640NS"]="1000" ["ST2000NM0033"]="2000" ["ST4000NM0033"]="4000" ["MB1000GCWCV"]="1000" ["MB2000GCWDB"]="2000" ["AL15SEB120N"]="1200" ["AL15SEB600N"]="600" ["HUC101212CSS600"]="1200" ["HUC103012CSS600"]="3000" ["HUC109090CSS600"]="900" ["MAX3147RC"]="147" ["ST3146356SS"]="146" ["ST3146855SS"]="146" ["ST33000650SS"]="3000" ["ST3600057SS"]="600" ["ST9146803SS"]="146" ["ST973451SS"]="73" ["AL13SXB300N"]="300" # Add more models as encountered ) # TBW endurance standards (using lowest numbers) declare -A CONSUMER_TBW=( ["250"]=150 ["500"]=300 ["1000"]=600 ["2000"]=1200 ["4000"]=2400 ["8000"]=4800 ) declare -A ENTERPRISE_TBW=( ["250"]=450 ["500"]=900 ["1000"]=1800 ["2000"]=3600 ["4000"]=7200 ["8000"]=14400 ) # Function to get closest capacity tier get_capacity_tier() { local capacity_gb=$1 local tiers=("250" "500" "1000" "2000" "4000" "8000") for tier in "${tiers[@]}"; do if [[ $capacity_gb -le $tier ]]; then echo $tier return fi done echo "8000" } # Function to extract numeric hours from power_on_hours field extract_numeric_hours() { local power_on_hours=$1 local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then echo "$numeric_hours" else echo "0" fi } # Function to get disk type and interface get_disk_info() { local disk=$1 local controller=$2 local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" smart_cmd+=" -i $disk" local info=$($smart_cmd 2>/dev/null) local transport="" local disk_type="UNKNOWN" local is_enterprise=false # Check if it's NVMe if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then disk_type="NVMe" transport="NVMe" # Check for SAS elif echo "$info" | grep -qi "SAS"; then transport="SAS" is_enterprise=true # Determine if SAS disk is HDD or SSD if echo "$info" | grep -qi "Solid State Device\|SSD"; then disk_type="SAS SSD" elif echo "$info" | grep -qi "Rotation Rate"; then disk_type="SAS HDD" else local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') if [[ -n "$model" ]]; then if echo "$model" | grep -qi "SSD\|Solid State"; then disk_type="SAS SSD" else disk_type="SAS HDD" fi else disk_type="SAS HDD" fi fi # Check for SATA SSD elif echo "$info" | grep -qi "Solid State Device\|SSD"; then disk_type="SATA SSD" transport="SATA" # Check for SATA HDD elif echo "$info" | grep -qi "Rotation Rate"; then disk_type="SATA HDD" transport="SATA" fi # Check for enterprise features if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then is_enterprise=true fi # Check device type by model name local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') if [[ -n "$model" ]]; then if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then is_enterprise=true fi if echo "$model" | grep -qi "SSD\|Solid State" && [[ "$disk_type" == "UNKNOWN" ]]; then disk_type="SSD" [[ "$transport" == "" ]] && transport="SATA" fi fi if [[ "$disk_type" == "UNKNOWN" ]]; then disk_type="Unknown" fi echo "$disk_type|$transport|$is_enterprise" } # Function to get SAS disk attributes get_sas_attributes() { local disk=$1 local controller=$2 local disk_type=$3 local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" local power_on_hours="" local reallocated_sectors="" local pending_sectors="" local start_stop_count="" local load_cycle_count="" local total_written="" local temperature="" local model="" local serial="" local firmware="" # Try extended information first for SAS disks local attributes=$($smart_cmd -x "$disk" 2>/dev/null) # If extended fails, try standard attributes if [[ -z "$attributes" ]]; then attributes=$($smart_cmd -a "$disk" 2>/dev/null) fi if [[ -n "$attributes" ]]; then # Extract model information model=$(echo "$attributes" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') serial=$(echo "$attributes" | grep -i "Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//') firmware=$(echo "$attributes" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//') # Extract power on hours power_on_hours=$(echo "$attributes" | grep -i "Accumulated power on time" | grep -oE "[0-9]+:[0-9]+" | head -1) if [[ -n "$power_on_hours" ]]; then local hours=$(echo "$power_on_hours" | cut -d: -f1) power_on_hours="$hours" fi # Extract temperature temperature=$(echo "$attributes" | grep -i "Current Drive Temperature" | grep -oE "[0-9]+" | head -1) # Extract mechanical counters for SAS HDDs if [[ "$disk_type" == "SAS HDD" ]]; then start_stop_count=$(echo "$attributes" | grep -i "Accumulated start-stop cycles" | grep -oE "[0-9]+" | head -1) load_cycle_count=$(echo "$attributes" | grep -i "Accumulated load-unload cycles" | grep -oE "[0-9]+" | head -1) fi # Extract error counters local error_count=$(echo "$attributes" | grep -i "Elements in grown defect list" | grep -oE "[0-9]+" | head -1) if [[ -n "$error_count" ]]; then reallocated_sectors="$error_count" fi # For SAS SSDs, look for write metrics if [[ "$disk_type" == "SAS SSD" ]]; then local write_data=$(echo "$attributes" | grep -A5 "Error counter log:" | grep -i "write" | grep -oE "[0-9]+\.[0-9]+" | head -1) if [[ -n "$write_data" ]]; then total_written="$write_data" fi fi fi echo "$power_on_hours|$reallocated_sectors|$pending_sectors|$start_stop_count|$load_cycle_count|$temperature|$model|$serial|$firmware" } # Function to calculate TBW for SSD/NVMe calculate_tbw() { local disk_type=$1 local raw_value=$2 local sectors=$3 local tbw=0 if [[ -n "$sectors" && "$sectors" != "0" ]]; then local bytes=$((sectors * 512)) tbw=$((bytes / 1000000000000)) elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then if [[ "$disk_type" == "NVMe" ]]; then tbw=$((raw_value * 32 / 1000000)) else tbw=$((raw_value * 32 / 1000000)) fi fi echo "$tbw" } # Function to get estimated endurance get_estimated_endurance() { local capacity_gb=$1 local is_enterprise=$2 local disk_type=$3 if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" || "$disk_type" == "HDD" ]]; then echo "N/A" return fi local capacity_tier=$(get_capacity_tier "$capacity_gb") if [[ "$is_enterprise" == "true" ]]; then echo "${ENTERPRISE_TBW[$capacity_tier]}" else echo "${CONSUMER_TBW[$capacity_tier]}" fi } # Function to estimate SSD lifespan with TBW remaining estimate_ssd_lifespan() { local power_on_hours=$1 local tbw_used=$2 local estimated_endurance=$3 local disk_type=$4 if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then echo "Unknown||Unknown|New" return fi if [[ "$estimated_endurance" == "N/A" ]]; then echo "N/A|N/A|N/A|HDD" return fi local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') if [[ -z "$clean_tbw_used" ]]; then clean_tbw_used=0 fi local tbw_remaining=$((estimated_endurance - clean_tbw_used)) if [[ $clean_tbw_used -gt 0 ]]; then local lifespan_used=$((clean_tbw_used * 100 / estimated_endurance)) local lifespan_remaining=$((100 - lifespan_used)) if [[ $lifespan_used -ge 80 ]]; then echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" elif [[ $lifespan_used -ge 50 ]]; then echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" else echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" fi else echo "Unknown|${estimated_endurance} TB|New|$disk_type" fi } # Function to estimate HDD lifespan estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 local start_stop_count=$4 local load_cycle_count=$5 local disk_type=$6 local temperature=$7 local numeric_hours=$(extract_numeric_hours "$power_on_hours") if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then echo "Unknown" return fi local severity=0 # Critical issues if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" return elif [[ "$reallocated_sectors" -gt 100 ]]; then severity=$((severity + 3)) elif [[ "$reallocated_sectors" -gt 10 ]]; then severity=$((severity + 2)) elif [[ "$reallocated_sectors" -gt 0 ]]; then severity=$((severity + 1)) fi # Temperature warning if [[ -n "$temperature" && "$temperature" -gt 50 ]]; then severity=$((severity + 2)) elif [[ -n "$temperature" && "$temperature" -gt 40 ]]; then severity=$((severity + 1)) fi # Usage-based assessment if [[ "$numeric_hours" -gt 50000 ]]; then severity=$((severity + 3)) elif [[ "$numeric_hours" -gt 30000 ]]; then severity=$((severity + 2)) elif [[ "$numeric_hours" -gt 15000 ]]; then severity=$((severity + 1)) fi # Mechanical wear (for HDDs) if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then if [[ "$start_stop_count" -gt 50000 ]]; then severity=$((severity + 2)) elif [[ "$start_stop_count" -gt 20000 ]]; then severity=$((severity + 1)) fi if [[ "$load_cycle_count" -gt 500000 ]]; then severity=$((severity + 2)) elif [[ "$load_cycle_count" -gt 200000 ]]; then severity=$((severity + 1)) fi fi if [[ $severity -ge 5 ]]; then echo "${RED}< 6 months${NC} (Multiple risk factors)" elif [[ $severity -ge 3 ]]; then echo "${YELLOW}6-18 months${NC} (Moderate wear)" elif [[ $severity -ge 1 ]]; then echo "${YELLOW}1-3 years${NC} (Light wear)" else echo "${GREEN}> 3 years${NC} (Healthy)" fi } # Function to check soft-raid (MDRAID) check_mdraid() { local md_devices=() if [[ -f /proc/mdstat ]]; then while IFS= read -r line; do if [[ $line =~ ^md[0-9]+ ]]; then md_devices+=("/dev/${line%% *}") fi done < /proc/mdstat fi for md in "${md_devices[@]}"; do if [[ -b "$md" ]]; then print_color $MAGENTA "Found software RAID: $md" if command_exists mdadm; then local md_info=$(mdadm --detail "$md" 2>/dev/null) if [[ -n "$md_info" ]]; then echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" echo "" fi fi fi done } # Function to get capacity using direct block device methods get_disk_capacity() { local disk=$1 local controller=$2 local disk_type=$3 local capacity_gb=0 local capacity_human="Unknown" # Method 1: Try lsblk first if command_exists lsblk; then local lsblk_output=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null) if [[ -n "$lsblk_output" && "$lsblk_output" =~ ^[0-9]+$ ]]; then capacity_gb=$((lsblk_output / 1000000000)) fi fi # Method 2: Try fdisk if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk") if [[ -n "$fdisk_info" ]]; then if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') local unit="${BASH_REMATCH[2]}" if [[ "$unit" == "T" ]]; then capacity_gb=$((size * 1000)) else capacity_gb=$size fi elif [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])B ]]; then local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') local unit="${BASH_REMATCH[2]}" if [[ "$unit" == "T" ]]; then capacity_gb=$((size * 1000)) else capacity_gb=$size fi elif [[ $fdisk_info =~ ([0-9,.]+)\s*bytes ]]; then local bytes=$(echo "$fdisk_info" | grep -oE '[0-9,]+' | head -1 | tr -d ',') capacity_gb=$((bytes / 1000000000)) fi fi fi # Method 3: Try blockdev if [[ $capacity_gb -eq 0 ]] && command_exists blockdev; then local blockdev_size=$(blockdev --getsize64 "$disk" 2>/dev/null) if [[ -n "$blockdev_size" && "$blockdev_size" =~ ^[0-9]+$ ]]; then capacity_gb=$((blockdev_size / 1000000000)) fi fi # Method 4: Model-based lookup if [[ $capacity_gb -eq 0 ]]; then local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" local model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') [[ -z "$model" ]] && model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') if [[ -n "$model" && -n "${MODEL_CAPACITIES[$model]}" ]]; then capacity_gb="${MODEL_CAPACITIES[$model]}" fi fi # Generate human readable capacity if [[ $capacity_gb -gt 0 ]]; then if [[ $capacity_gb -ge 1000 ]]; then local tb_capacity=$((capacity_gb / 1000)) capacity_human="${tb_capacity} TB" else capacity_human="${capacity_gb} GB" fi else capacity_human="Unknown" fi echo "$capacity_gb|$capacity_human" } # Function to check a single disk check_disk() { local disk=$1 local controller=$2 local smart_cmd="smartctl" [[ -n "$controller" ]] && smart_cmd+=" -d $controller" print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" echo "==================================================" # Get disk information local disk_info=$(get_disk_info "$disk" "$controller") local disk_type=$(echo "$disk_info" | cut -d'|' -f1) local transport=$(echo "$disk_info" | cut -d'|' -f2) local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) # Get basic disk information local info=$($smart_cmd -i "$disk" 2>/dev/null) local health=$($smart_cmd -H "$disk" 2>/dev/null) # Check if we can read the disk if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification." echo "" return fi # Extract disk information local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') [[ -z "$firmware" ]] && firmware=$(echo "$info" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//') # Get capacity local capacity_info=$(get_disk_capacity "$disk" "$controller" "$disk_type") local capacity_gb=$(echo "$capacity_info" | cut -d'|' -f1) local capacity_human=$(echo "$capacity_info" | cut -d'|' -f2) local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART Health Status" | cut -d: -f2 | sed 's/^[ \t]*//') # Extract SMART attributes based on disk type local power_on_hours="" local reallocated_sectors="" local pending_sectors="" local start_stop_count="" local load_cycle_count="" local total_written="" local host_writes_32mib="" local temperature="" local sas_model="" local sas_serial="" local sas_firmware="" if [[ "$disk_type" == "SAS HDD" || "$disk_type" == "SAS SSD" ]]; then local sas_attrs=$(get_sas_attributes "$disk" "$controller" "$disk_type") power_on_hours=$(echo "$sas_attrs" | cut -d'|' -f1) reallocated_sectors=$(echo "$sas_attrs" | cut -d'|' -f2) pending_sectors=$(echo "$sas_attrs" | cut -d'|' -f3) start_stop_count=$(echo "$sas_attrs" | cut -d'|' -f4) load_cycle_count=$(echo "$sas_attrs" | cut -d'|' -f5) temperature=$(echo "$sas_attrs" | cut -d'|' -f6) sas_model=$(echo "$sas_attrs" | cut -d'|' -f7) sas_serial=$(echo "$sas_attrs" | cut -d'|' -f8) sas_firmware=$(echo "$sas_attrs" | cut -d'|' -f9) # Use SAS-extracted data if available [[ -n "$sas_model" ]] && model="$sas_model" [[ -n "$sas_serial" ]] && serial="$sas_serial" [[ -n "$sas_firmware" ]] && firmware="$sas_firmware" else local attributes=$($smart_cmd -A "$disk" 2>/dev/null) if [[ -n "$attributes" ]]; then power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}') reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}') pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}') start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}') load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}') total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}') host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}') fi fi # Clean up extracted values power_on_hours=$(extract_numeric_hours "$power_on_hours") reallocated_sectors=${reallocated_sectors:-0} pending_sectors=${pending_sectors:-0} start_stop_count=${start_stop_count:-0} load_cycle_count=${load_cycle_count:-0} # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" echo "Interface: $transport" echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" echo "Capacity: $capacity_human" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" # Show temperature if available if [[ -n "$temperature" && "$temperature" != "0" ]]; then echo "Temperature: ${temperature} C" fi # Only show Power On Hours if we have a valid value if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then echo "Power On Hours: $power_on_hours" else echo "Power On Hours: Unknown" fi # Disk type specific analysis if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then echo "Realloc Sectors: $reallocated_sectors" echo "Pending Sectors: $pending_sectors" # Only show mechanical counters if we have values if [[ -n "$start_stop_count" && "$start_stop_count" != "0" ]]; then echo "Start/Stop Count: $start_stop_count" fi if [[ -n "$load_cycle_count" && "$load_cycle_count" != "0" ]]; then echo "Load Cycle Count: $load_cycle_count" fi local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "$reallocated_sectors" "$pending_sectors" "$start_stop_count" "$load_cycle_count" "$disk_type" "$temperature") echo "Lifespan: $lifespan" elif [[ "$disk_type" == "SATA SSD" || "$disk_type" == "SAS SSD" || "$disk_type" == "NVMe" ]]; then local tbw_used=0 if [[ -n "$total_written" && "$total_written" != "0" ]]; then tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") fi local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") if [[ "$estimated_endurance" != "N/A" ]]; then echo "TBW Used: ${tbw_used} TB" echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) echo "TBW Remaining: $tbw_remaining" echo "Lifespan: $lifespan_percent ($wear_status)" else echo "TBW Used: ${tbw_used} TB" echo "Lifespan: Unknown (Cannot estimate without usage data)" fi if [[ "$disk_type" == "SAS SSD" ]]; then echo "Realloc Sectors: $reallocated_sectors" echo "Pending Sectors: $pending_sectors" fi else print_color $YELLOW "Unknown disk type - limited information available" echo "Realloc Sectors: $reallocated_sectors" echo "Pending Sectors: $pending_sectors" fi echo "" } # Function to detect RAID controllers and disks detect_raid_disks() { local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") local disks=() # Check for direct disks first for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do if [[ -b "$disk" ]]; then disks+=("$disk:direct") fi done # Check for RAID controllers for controller in "${controllers[@]}"; do for i in {0..31}; do for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then disks+=("$base_disk:$controller,$i") break fi done done done # Check for JBOD/passthrough disks on MegaRAID if command_exists storcli; then local jbod_disks=$(storcli /c0/eALL/sALL show all 2>/dev/null | grep -i "jbod\|unconfigured" | awk '{print $2}') for disk in $jbod_disks; do disks+=("$disk:megaraid,$disk") done fi echo "${disks[@]}" } # Main function main() { print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" print_color $BLUE "====================================================" echo "" local disks=() # Check for soft-raid first check_mdraid # If specific disk provided, check only that disk if [[ $# -gt 0 ]]; then for disk in "$@"; do if [[ -b "$disk" ]]; then disks+=("$disk:direct") else print_color $RED "Error: $disk is not a valid block device" fi done else # Auto-detect disks print_color $CYAN "Auto-detecting disks..." read -ra disks <<< "$(detect_raid_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then print_color $YELLOW "No disks found via auto-detection" print_color $CYAN "Trying direct disk access..." for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do if [[ -b "$disk" ]]; then disks+=("$disk:direct") fi done fi if [[ ${#disks[@]} -eq 0 ]]; then print_color $RED "No disks found or accessible" echo "Try running as root or specifying disk paths manually" exit 1 fi print_color $GREEN "Found ${#disks[@]} disk(s) to check" echo "" # Check each disk for disk_info in "${disks[@]}"; do IFS=':' read -r disk controller <<< "$disk_info" check_disk "$disk" "$controller" done print_color $BLUE "Check completed!" echo "" print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." print_color $YELLOW " Actual endurance for your specific drive model may be higher." } # Usage information usage() { echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" echo "" echo "If no disks specified, auto-detects all available disks and RAID arrays" echo "" echo "Examples:" echo " $SCRIPT_NAME # Check all auto-detected disks" echo " $SCRIPT_NAME /dev/sda # Check specific disk" echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" echo "" echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" } # Parse command line arguments case "${1:-}" in -h|--help) usage exit 0 ;; -v|--version) echo "$SCRIPT_NAME version $VERSION" exit 0 ;; *) main "$@" ;; esac