From 9c020df99e337aaa7a75a3cc43561f9175bbd505 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 22 Oct 2025 04:51:32 +0800 Subject: [PATCH] added v2.6 --- alma-v2.6.sh | 750 +++++++++++++++++++++++++++++++++++++++++++++ harvester-v2.6.sh | 752 +++++++++++++++++++++++++++++++++++++++++++++ ubuntu-v2.6.sh | 759 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 2261 insertions(+) create mode 100755 alma-v2.6.sh create mode 100755 harvester-v2.6.sh create mode 100755 ubuntu-v2.6.sh diff --git a/alma-v2.6.sh b/alma-v2.6.sh new file mode 100755 index 0000000..83a1e94 --- /dev/null +++ b/alma-v2.6.sh @@ -0,0 +1,750 @@ +#!/bin/bash + +# Disk Health Check Script for Alma Linux 9 +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.6" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo dnf install ${missing[*]}" + exit 1 + fi +} + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" + fi +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats - IMPROVED FOR HDD/SAS +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then + # Pattern: 2000398934016B + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity_gb=$1 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to extract capacity from model name for HDD/SAS drives +extract_capacity_from_model() { + local model=$1 + local disk_type=$2 + + # Common HDD/SAS capacity patterns in model names + if [[ "$model" =~ 2[Tt][Bb] ]] || [[ "$model" =~ 2000[Gg] ]]; then + echo "2000" + elif [[ "$model" =~ 1[Tt][Bb] ]] || [[ "$model" =~ 1000[Gg] ]]; then + echo "1000" + elif [[ "$model" =~ 4[Tt][Bb] ]] || [[ "$model" =~ 4000[Gg] ]]; then + echo "4000" + elif [[ "$model" =~ 8[Tt][Bb] ]] || [[ "$model" =~ 8000[Gg] ]]; then + echo "8000" + elif [[ "$model" =~ 500[Gg] ]]; then + echo "500" + elif [[ "$model" =~ 250[Gg] ]]; then + echo "250" + else + # Try to extract numbers that look like capacities + local capacity_match=$(echo "$model" | grep -oE '[0-9]+[GT]B' | head -1) + if [[ -n "$capacity_match" ]]; then + if [[ "$capacity_match" =~ ([0-9]+)TB ]]; then + echo "$((${BASH_REMATCH[1]} * 1000))" + elif [[ "$capacity_match" =~ ([0-9]+)GB ]]; then + echo "${BASH_REMATCH[1]}" + fi + else + echo "0" + fi + fi +} + +# Function to check a single disk +check_disk() { + local disk=$1 + + print_color $CYAN "Checking disk: $disk" + echo "==================================================" + + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi + + # Get disk information + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human="" + + # If capacity extraction failed, try alternative methods + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name (especially for HDD/SAS) + local model_capacity=$(extract_capacity_from_model "$model" "$disk_type") + if [[ "$model_capacity" -gt 0 ]]; then + capacity_gb="$model_capacity" + capacity_human=$(get_human_capacity "$capacity_gb") + else + # Final fallback based on disk type and common sizes + if [[ "$disk_type" == "HDD" ]]; then + # Common HDD sizes + if [[ "$model" =~ ST2000 ]]; then + capacity_gb=2000 + elif [[ "$model" =~ ST1000 ]]; then + capacity_gb=1000 + elif [[ "$model" =~ ST4000 ]]; then + capacity_gb=4000 + elif [[ "$model" =~ ST3000 ]]; then + capacity_gb=3000 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "SSD" ]]; then + # Common SSD sizes + if [[ "$model" =~ 960[Gg] ]]; then + capacity_gb=960 + elif [[ "$model" =~ 480[Gg] ]]; then + capacity_gb=480 + elif [[ "$model" =~ 240[Gg] ]]; then + capacity_gb=240 + elif [[ "$model" =~ 120[Gg] ]]; then + capacity_gb=120 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "NVMe" ]]; then + # Common NVMe sizes + if [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + elif [[ "$model" =~ 1000[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + elif [[ "$model" =~ 2000[Gg] ]] || [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + else + capacity_gb=0 + fi + fi + capacity_human=$(get_human_capacity "$capacity_gb") + fi + else + capacity_human=$(get_human_capacity "$capacity_gb") + fi + + # If we still don't have capacity, show unknown + if [[ "$capacity_gb" -eq 0 ]]; then + capacity_human="Unknown" + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect all disks +detect_disks() { + local disks=() + + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks (base devices only, no partitions) + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9" + print_color $BLUE "====================================================" + echo "" + + check_dependencies + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk in "${disks[@]}"; do + check_disk "$disk" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/harvester-v2.6.sh b/harvester-v2.6.sh new file mode 100755 index 0000000..020443b --- /dev/null +++ b/harvester-v2.6.sh @@ -0,0 +1,752 @@ +#!/bin/bash + +# Disk Health Check Script for Harvester OS +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.6" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if smartctl is installed +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +if ! command_exists smartctl; then + print_color $RED "Error: smartctl is not installed. Please install smartmontools package." + exit 1 +fi + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc -l 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc -l 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc -l 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" + fi +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + smart_cmd+=" -i $disk" + + local info=$($smart_cmd 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc -l 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats - IMPROVED FOR HDD/SAS +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then + # Pattern: 2000398934016B + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc -l 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local controller=$2 + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local nvme_info=$($smart_cmd -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity_gb=$1 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc -l) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to extract capacity from model name for HDD/SAS drives +extract_capacity_from_model() { + local model=$1 + local disk_type=$2 + + # Common HDD/SAS capacity patterns in model names + if [[ "$model" =~ 2[Tt][Bb] ]] || [[ "$model" =~ 2000[Gg] ]]; then + echo "2000" + elif [[ "$model" =~ 1[Tt][Bb] ]] || [[ "$model" =~ 1000[Gg] ]]; then + echo "1000" + elif [[ "$model" =~ 4[Tt][Bb] ]] || [[ "$model" =~ 4000[Gg] ]]; then + echo "4000" + elif [[ "$model" =~ 8[Tt][Bb] ]] || [[ "$model" =~ 8000[Gg] ]]; then + echo "8000" + elif [[ "$model" =~ 500[Gg] ]]; then + echo "500" + elif [[ "$model" =~ 250[Gg] ]]; then + echo "250" + else + # Try to extract numbers that look like capacities + local capacity_match=$(echo "$model" | grep -oE '[0-9]+[GT]B' | head -1) + if [[ -n "$capacity_match" ]]; then + if [[ "$capacity_match" =~ ([0-9]+)TB ]]; then + echo "$((${BASH_REMATCH[1]} * 1000))" + elif [[ "$capacity_match" =~ ([0-9]+)GB ]]; then + echo "${BASH_REMATCH[1]}" + fi + else + echo "0" + fi + fi +} + +# Function to check a single disk +check_disk() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + + # Check if we can read the disk + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk" "$controller") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human="" + + # If capacity extraction failed, try alternative methods + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name (especially for HDD/SAS) + local model_capacity=$(extract_capacity_from_model "$model" "$disk_type") + if [[ "$model_capacity" -gt 0 ]]; then + capacity_gb="$model_capacity" + capacity_human=$(get_human_capacity "$capacity_gb") + else + # Final fallback based on disk type and common sizes + if [[ "$disk_type" == "HDD" ]]; then + # Common HDD sizes + if [[ "$model" =~ ST2000 ]]; then + capacity_gb=2000 + elif [[ "$model" =~ ST1000 ]]; then + capacity_gb=1000 + elif [[ "$model" =~ ST4000 ]]; then + capacity_gb=4000 + elif [[ "$model" =~ ST3000 ]]; then + capacity_gb=3000 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "SSD" ]]; then + # Common SSD sizes + if [[ "$model" =~ 960[Gg] ]]; then + capacity_gb=960 + elif [[ "$model" =~ 480[Gg] ]]; then + capacity_gb=480 + elif [[ "$model" =~ 240[Gg] ]]; then + capacity_gb=240 + elif [[ "$model" =~ 120[Gg] ]]; then + capacity_gb=120 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "NVMe" ]]; then + # Common NVMe sizes + if [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + elif [[ "$model" =~ 1000[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + elif [[ "$model" =~ 2000[Gg] ]] || [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + else + capacity_gb=0 + fi + fi + capacity_human=$(get_human_capacity "$capacity_gb") + fi + else + capacity_human=$(get_human_capacity "$capacity_gb") + fi + + # If we still don't have capacity, show unknown + if [[ "$capacity_gb" -eq 0 ]]; then + capacity_human="Unknown" + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}') + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}') + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}') + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}') + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}') + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}') + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$($smart_cmd -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc -l 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc -l 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect RAID controllers and disks +detect_raid_disks() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") + local disks=() + + # Check for direct disks first (SATA/SAS/NVMe) + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different device patterns + for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do + if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then + disks+=("$base_disk:$controller,$i") + break + fi + done + done + done + + # Check for JBOD/passthrough disks on MegaRAID + if command_exists storcli; then + local jbod_disks=$(storcli /c0/eALL/sALL show all 2>/dev/null | grep -i "jbod\|unconfigured" | awk '{print $2}') + for disk in $jbod_disks; do + disks+=("$disk:megaraid,$disk") + done + fi + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" + print_color $BLUE "====================================================" + echo "" + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_raid_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $YELLOW "No disks found via auto-detection" + print_color $CYAN "Trying direct disk access..." + # Try direct access to common disks + for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check each disk + for disk_info in "${disks[@]}"; do + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks and RAID arrays" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/ubuntu-v2.6.sh b/ubuntu-v2.6.sh new file mode 100755 index 0000000..b75c9d3 --- /dev/null +++ b/ubuntu-v2.6.sh @@ -0,0 +1,759 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.6" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" + fi +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats - IMPROVED FOR HDD/SAS +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Debug: Show what we're trying to parse + # echo "DEBUG: Parsing capacity: '$capacity'" >&2 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 1 - TB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 2 - GB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 3 - bytes: $bytes, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 4 - TB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 5 - GB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then + # Pattern: 2000398934016B + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 6 - bytes: $bytes, GB: $capacity_gb" >&2 + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity_gb=$1 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to extract capacity from model name for HDD/SAS drives +extract_capacity_from_model() { + local model=$1 + local disk_type=$2 + + # Common HDD/SAS capacity patterns in model names + if [[ "$model" =~ 2[Tt][Bb] ]] || [[ "$model" =~ 2000[Gg] ]]; then + echo "2000" + elif [[ "$model" =~ 1[Tt][Bb] ]] || [[ "$model" =~ 1000[Gg] ]]; then + echo "1000" + elif [[ "$model" =~ 4[Tt][Bb] ]] || [[ "$model" =~ 4000[Gg] ]]; then + echo "4000" + elif [[ "$model" =~ 8[Tt][Bb] ]] || [[ "$model" =~ 8000[Gg] ]]; then + echo "8000" + elif [[ "$model" =~ 500[Gg] ]]; then + echo "500" + elif [[ "$model" =~ 250[Gg] ]]; then + echo "250" + else + # Try to extract numbers that look like capacities + local capacity_match=$(echo "$model" | grep -oE '[0-9]+[GT]B' | head -1) + if [[ -n "$capacity_match" ]]; then + if [[ "$capacity_match" =~ ([0-9]+)TB ]]; then + echo "$((${BASH_REMATCH[1]} * 1000))" + elif [[ "$capacity_match" =~ ([0-9]+)GB ]]; then + echo "${BASH_REMATCH[1]}" + fi + else + echo "0" + fi + fi +} + +# Function to check a single disk +check_disk() { + local disk=$1 + + print_color $CYAN "Checking disk: $disk" + echo "==================================================" + + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi + + # Get disk information + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human="" + + # If capacity extraction failed, try alternative methods + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name (especially for HDD/SAS) + local model_capacity=$(extract_capacity_from_model "$model" "$disk_type") + if [[ "$model_capacity" -gt 0 ]]; then + capacity_gb="$model_capacity" + capacity_human=$(get_human_capacity "$capacity_gb") + else + # Final fallback based on disk type and common sizes + if [[ "$disk_type" == "HDD" ]]; then + # Common HDD sizes + if [[ "$model" =~ ST2000 ]]; then + capacity_gb=2000 + elif [[ "$model" =~ ST1000 ]]; then + capacity_gb=1000 + elif [[ "$model" =~ ST4000 ]]; then + capacity_gb=4000 + elif [[ "$model" =~ ST3000 ]]; then + capacity_gb=3000 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "SSD" ]]; then + # Common SSD sizes + if [[ "$model" =~ 960[Gg] ]]; then + capacity_gb=960 + elif [[ "$model" =~ 480[Gg] ]]; then + capacity_gb=480 + elif [[ "$model" =~ 240[Gg] ]]; then + capacity_gb=240 + elif [[ "$model" =~ 120[Gg] ]]; then + capacity_gb=120 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "NVMe" ]]; then + # Common NVMe sizes + if [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + elif [[ "$model" =~ 1000[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + elif [[ "$model" =~ 2000[Gg] ]] || [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + else + capacity_gb=0 + fi + fi + capacity_human=$(get_human_capacity "$capacity_gb") + fi + else + capacity_human=$(get_human_capacity "$capacity_gb") + fi + + # If we still don't have capacity, show unknown + if [[ "$capacity_gb" -eq 0 ]]; then + capacity_human="Unknown" + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect all disks +detect_disks() { + local disks=() + + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks (base devices only, no partitions) + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" + echo "" + + check_dependencies + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk in "${disks[@]}"; do + check_disk "$disk" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac