From 5f8ae02d714a9b15d0090ee9fdcfa903c02e0557 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 22 Oct 2025 04:35:22 +0800 Subject: [PATCH] added v2.5 --- alma-v2.3.sh | 531 +++++++++++++ alma-v2.5.sh | 677 +++++++++++++++++ harvester-v2.3.sh | 530 +++++++++++++ harvester-v2.5.sh | 679 +++++++++++++++++ README.md => old/README.md | 0 alma-v2.4.sh => old/alma-v2.4.sh | 0 harvester-v2.4.sh => old/harvester-v2.4.sh | 0 old/ubuntu-v2.5.sh | 615 +++++++++++++++ ubuntu-v2.6.sh => old/ubuntu-v2.6.sh | 0 ubuntu-v2.7.sh => old/ubuntu-v2.7.sh | 0 ubuntu-v2.8.sh => old/ubuntu-v2.8.sh | 0 ubuntu-v3.0.sh => old/ubuntu-v3.0.sh | 0 ubuntu-v2.3.sh | 531 +++++++++++++ ubuntu-v2.4.sh | 638 ++++++++++++++++ ubuntu-v2.5.sh | 826 +++++++++++---------- 15 files changed, 4645 insertions(+), 382 deletions(-) create mode 100755 alma-v2.3.sh create mode 100755 alma-v2.5.sh create mode 100755 harvester-v2.3.sh create mode 100755 harvester-v2.5.sh rename README.md => old/README.md (100%) rename alma-v2.4.sh => old/alma-v2.4.sh (100%) rename harvester-v2.4.sh => old/harvester-v2.4.sh (100%) create mode 100755 old/ubuntu-v2.5.sh rename ubuntu-v2.6.sh => old/ubuntu-v2.6.sh (100%) rename ubuntu-v2.7.sh => old/ubuntu-v2.7.sh (100%) rename ubuntu-v2.8.sh => old/ubuntu-v2.8.sh (100%) rename ubuntu-v3.0.sh => old/ubuntu-v3.0.sh (100%) create mode 100755 ubuntu-v2.3.sh create mode 100755 ubuntu-v2.4.sh diff --git a/alma-v2.3.sh b/alma-v2.3.sh new file mode 100755 index 0000000..9eb6722 --- /dev/null +++ b/alma-v2.3.sh @@ -0,0 +1,531 @@ +#!/bin/bash + +# Disk Health Check Script for Alma Linux 9 +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.3" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo dnf install ${missing[*]}" + exit 1 + fi +} + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 4TB, use 4TB tier with scaling + echo "4000" +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + if [[ -z "$power_on_hours" ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$power_on_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$power_on_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$power_on_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to check a single disk +check_disk() { + local disk=$1 + + print_color $CYAN "Checking disk: $disk" + echo "==================================================" + + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi + + # Get disk information + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract capacity in GB + local capacity_gb=0 + if echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + power_on_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + total_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + [[ -z "$total_written" ]] && total_written=$(echo "$nvme_attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect all disks +detect_disks() { + local disks=() + + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks (base devices only, no partitions) + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9" + print_color $BLUE "====================================================" + echo "" + + check_dependencies + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk in "${disks[@]}"; do + check_disk "$disk" + done + + print_color $BLUE "Check completed!" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/alma-v2.5.sh b/alma-v2.5.sh new file mode 100755 index 0000000..8970d8c --- /dev/null +++ b/alma-v2.5.sh @@ -0,0 +1,677 @@ +#!/bin/bash + +# Disk Health Check Script for Alma Linux 9 +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.5" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo dnf install ${missing[*]}" + exit 1 + fi +} + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" + fi +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity=$1 + local capacity_gb=$2 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to check a single disk +check_disk() { + local disk=$1 + + print_color $CYAN "Checking disk: $disk" + echo "==================================================" + + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi + + # Get disk information + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb") + + # If capacity extraction failed, try alternative method + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name or other methods + if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + capacity_human="1 TB" + elif [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + capacity_human="2 TB" + elif [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 250[Gg] ]]; then + capacity_gb=250 + capacity_human="250 GB" + fi + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect all disks +detect_disks() { + local disks=() + + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks (base devices only, no partitions) + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9" + print_color $BLUE "====================================================" + echo "" + + check_dependencies + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk in "${disks[@]}"; do + check_disk "$disk" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/harvester-v2.3.sh b/harvester-v2.3.sh new file mode 100755 index 0000000..88acf68 --- /dev/null +++ b/harvester-v2.3.sh @@ -0,0 +1,530 @@ +#!/bin/bash + +# Disk Health Check Script for Harvester OS +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.3" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if smartctl is installed +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +if ! command_exists smartctl; then + print_color $RED "Error: smartctl is not installed. Please install smartmontools package." + exit 1 +fi + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 4TB, use 4TB tier with scaling + echo "4000" +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + smart_cmd+=" -i $disk" + + local info=$($smart_cmd 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc -l 2>/dev/null || echo "0") + + if [[ $(echo "$tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + if [[ -z "$power_on_hours" ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$power_on_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$power_on_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$power_on_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + done +} + +# Function to check a single disk +check_disk() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + + # Check if we can read the disk + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract capacity in GB + local capacity_gb=0 + if [[ $capacity =~ \[([0-9.]+)\s+GB\] ]]; then + capacity_gb=${BASH_REMATCH[1]%.*} + elif [[ $capacity =~ \[([0-9.]+)\s+TB\] ]]; then + capacity_gb=$(echo "${BASH_REMATCH[1]} * 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc -l 2>/dev/null | cut -d. -f1) + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}') + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}') + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}') + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}') + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}') + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}') + + # For NVMe disks using smartctl + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$($smart_cmd -x "$disk" 2>/dev/null) + power_on_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + total_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + [[ -z "$total_written" ]] && total_written=$(echo "$nvme_attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect RAID controllers and disks +detect_raid_disks() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") + local disks=() + + # Check for direct disks first (SATA/SAS/NVMe) + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different device patterns + for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do + if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then + disks+=("$base_disk:$controller,$i") + break + fi + done + done + done + + # Check for JBOD/passthrough disks on MegaRAID + if command_exists storcli; then + local jbod_disks=$(storcli /c0/eALL/sALL show all 2>/dev/null | grep -i "jbod\|unconfigured" | awk '{print $2}') + for disk in $jbod_disks; do + disks+=("$disk:megaraid,$disk") + done + fi + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" + print_color $BLUE "====================================================" + echo "" + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_raid_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $YELLOW "No disks found via auto-detection" + print_color $CYAN "Trying direct disk access..." + # Try direct access to common disks + for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check each disk + for disk_info in "${disks[@]}"; do + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + done + + print_color $BLUE "Check completed!" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks and RAID arrays" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/harvester-v2.5.sh b/harvester-v2.5.sh new file mode 100755 index 0000000..f940938 --- /dev/null +++ b/harvester-v2.5.sh @@ -0,0 +1,679 @@ +#!/bin/bash + +# Disk Health Check Script for Harvester OS +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.5" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if smartctl is installed +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +if ! command_exists smartctl; then + print_color $RED "Error: smartctl is not installed. Please install smartmontools package." + exit 1 +fi + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc -l 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc -l 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc -l 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" + fi +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + smart_cmd+=" -i $disk" + + local info=$($smart_cmd 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc -l 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc -l 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc -l 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local controller=$2 + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local nvme_info=$($smart_cmd -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity=$1 + local capacity_gb=$2 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc -l) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to check a single disk +check_disk() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + + # Check if we can read the disk + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk" "$controller") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb") + + # If capacity extraction failed, try alternative method + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name or other methods + if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + capacity_human="1 TB" + elif [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + capacity_human="2 TB" + elif [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 250[Gg] ]]; then + capacity_gb=250 + capacity_human="250 GB" + fi + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}') + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}') + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}') + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}') + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}') + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}') + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$($smart_cmd -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc -l 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc -l 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect RAID controllers and disks +detect_raid_disks() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") + local disks=() + + # Check for direct disks first (SATA/SAS/NVMe) + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different device patterns + for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do + if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then + disks+=("$base_disk:$controller,$i") + break + fi + done + done + done + + # Check for JBOD/passthrough disks on MegaRAID + if command_exists storcli; then + local jbod_disks=$(storcli /c0/eALL/sALL show all 2>/dev/null | grep -i "jbod\|unconfigured" | awk '{print $2}') + for disk in $jbod_disks; do + disks+=("$disk:megaraid,$disk") + done + fi + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" + print_color $BLUE "====================================================" + echo "" + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_raid_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $YELLOW "No disks found via auto-detection" + print_color $CYAN "Trying direct disk access..." + # Try direct access to common disks + for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check each disk + for disk_info in "${disks[@]}"; do + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks and RAID arrays" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/README.md b/old/README.md similarity index 100% rename from README.md rename to old/README.md diff --git a/alma-v2.4.sh b/old/alma-v2.4.sh similarity index 100% rename from alma-v2.4.sh rename to old/alma-v2.4.sh diff --git a/harvester-v2.4.sh b/old/harvester-v2.4.sh similarity index 100% rename from harvester-v2.4.sh rename to old/harvester-v2.4.sh diff --git a/old/ubuntu-v2.5.sh b/old/ubuntu-v2.5.sh new file mode 100755 index 0000000..bb64ff9 --- /dev/null +++ b/old/ubuntu-v2.5.sh @@ -0,0 +1,615 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu 24.04 +# Enhanced with SAS/PERC H730P controller support +# Checks SSD TBW/lifespan and HDD health status + +SCRIPT_NAME=$(basename "$0") +VERSION="2.5" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# Function to test SMART access and get available data - FIXED VERSION +test_smart_access() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + # Test basic SMART access + if ! $smart_cmd -i "$disk" &>/dev/null; then + echo "no_access" + return + fi + + # Get SMART information + local smart_info=$($smart_cmd -i "$disk" 2>/dev/null) + + # Check if SMART is available - FIXED PARSING + if ! echo "$smart_info" | grep -q "SMART support is:"; then + echo "not_available" + return + fi + + # Extract SMART status - FIXED LOGIC + local smart_support_line=$(echo "$smart_info" | grep "SMART support is:") + local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "") + local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "") + + if [[ -z "$smart_available" ]]; then + echo "not_available" + return + fi + + if [[ -z "$smart_enabled" ]]; then + echo "disabled" + return + fi + + # Test attribute reading + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then + echo "no_attributes" + return + fi + + echo "full_access" +} + +# Function to get disk information with enhanced SAS support +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + + # Extract information with multiple fallbacks for SAS drives + local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" + + local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) + + local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) + + # Get disk type with SAS support + local disk_type="UNKNOWN" + if echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + elif echo "$info" | grep -qi "SCSI\|SAS"; then + # SAS drives often don't specify, check rotation rate + if echo "$info" | grep -qi "15000\|10000\|7200"; then + disk_type="HDD" + else + disk_type="SSD" + fi + fi + + # Extract SMART attributes with multiple field attempts + local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1) + + local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + + local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) + + # For Kingston and other SSDs with different attribute names + local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) + + # For wear leveling indicators + local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1) + + echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" +} + +# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON +calculate_tbw() { + local raw_value=$1 + local sectors=$2 + local disk_model=$3 + + # Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Convert from GiB to TB + local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + return + fi + fi + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + local bytes=$((sectors * 512)) + local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + else + echo "0" + fi +} + +# Function to estimate SSD endurance based on model and capacity +estimate_ssd_endurance() { + local disk_model=$1 + local capacity_gb=$2 + + # Kingston consumer SSDs + if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then + if [[ $capacity_gb -ge 960 ]]; then + echo "300" # 300TB for 960GB Kingston SA400 + elif [[ $capacity_gb -ge 480 ]]; then + echo "150" # 150TB for 480GB Kingston + else + echo "80" # 80TB for smaller Kingston + fi + # SAS SSDs typically have very high endurance + elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then + # Enterprise SAS SSDs - very high endurance + if [[ $capacity_gb -ge 1000 ]]; then + echo "10000" # 10PB for 1TB+ enterprise SAS SSD + elif [[ $capacity_gb -ge 600 ]]; then + echo "6000" # 6PB for 600GB enterprise SAS SSD + elif [[ $capacity_gb -ge 400 ]]; then + echo "4000" # 4PB for 400GB enterprise SAS SSD + else + echo "2000" # 2PB for smaller enterprise SAS SSD + fi + elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then + # Enterprise SATA/NVMe SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "1200" # 1.2PB for 1TB enterprise + elif [[ $capacity_gb -ge 480 ]]; then + echo "600" # 600TB for 480GB enterprise + elif [[ $capacity_gb -ge 240 ]]; then + echo "300" # 300TB for 240GB enterprise + else + echo "150" # 150TB for smaller enterprise + fi + else + # Consumer SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "600" # 600TB for 1TB consumer + elif [[ $capacity_gb -ge 480 ]]; then + echo "300" # 300TB for 480GB consumer + elif [[ $capacity_gb -ge 240 ]]; then + echo "150" # 150TB for 240GB consumer + elif [[ $capacity_gb -ge 120 ]]; then + echo "80" # 80TB for 120GB consumer + else + echo "40" # 40TB for smaller drives + fi + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local disk_model=$3 + local capacity_gb=$4 + local media_wearout=$5 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown||Unknown" + return + fi + + local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") + + # If we have media wearout indicator, use it for more accurate estimation + if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then + # For Kingston, SSD_Life_Left is already a percentage + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + else + # For other drives, media_wearout might be countdown from 100 + local wear_percent=$media_wearout + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + fi + return + fi + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + fi + else + echo "Unknown|${estimated_endurance} TB|New|estimated" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + + if [[ -z "$power_on_hours" ]]; then + echo "Unknown" + return + fi + + power_on_hours=${power_on_hours:-0} + reallocated_sectors=${reallocated_sectors:-0} + pending_sectors=${pending_sectors:-0} + + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + elif [[ "$reallocated_sectors" -gt 100 ]]; then + echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + elif [[ "$reallocated_sectors" -gt 10 ]]; then + echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" + elif [[ "$power_on_hours" -gt 40000 ]]; then + echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)" + elif [[ "$power_on_hours" -gt 25000 ]]; then + echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)" + else + echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)" + fi +} + +# Function to check a single disk with enhanced error handling +check_disk() { + local disk=$1 + local controller=$2 + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Test SMART access level + local access_level=$(test_smart_access "$disk" "$controller") + + case $access_level in + "no_access") + print_color $RED "ERROR: Cannot access disk through controller" + echo "Possible reasons:" + echo " - Controller doesn't support SMART passthrough" + echo " - Disk is part of a hardware RAID array" + echo " - Insufficient permissions (try running as root)" + echo " - Controller busy or offline" + echo "" + return + ;; + "not_available") + print_color $YELLOW "SMART not available on this disk" + echo "This disk does not support SMART monitoring" + echo "" + return + ;; + "disabled") + print_color $YELLOW "SMART is disabled on this disk" + echo "SMART is available but currently disabled" + echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" + echo "" + return + ;; + "no_attributes") + print_color $YELLOW "WARNING: Cannot read SMART attributes" + echo "This is common with hardware RAID controllers like PERC H730P" + echo "Try checking through the RAID management interface" + echo "" + return + ;; + "limited_attributes") + print_color $YELLOW "NOTE: Limited SMART data available" + echo "Controller is filtering some SMART attributes" + ;; + esac + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + + # Only show power on hours if available + if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then + echo "Power On Hours: $power_on_hours" + else + echo "Power On Hours: Unknown" + fi + + # Disk type specific analysis + if [[ "$disk_type" == "SSD" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "" "$total_written" "$model") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model") + fi + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + echo "TBW Used: ${tbw_used} TB" + fi + + # Estimate capacity for endurance calculation + local capacity_gb=0 + if echo "$capacity" | grep -qi "GB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) + elif echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) + fi + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) + + if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + echo "TBW Remaining: $tbw_remaining" + fi + + echo "Lifespan: $lifespan_percent ($wear_status)" + + # Show wear source if available + if [[ "$wear_source" == "media_wearout" ]]; then + echo "Wear Source: Media Wearout Indicator" + elif [[ "$wear_source" == "tbw" ]]; then + echo "Wear Source: TBW Calculation" + elif [[ "$wear_source" == "estimated" ]]; then + echo "Wear Source: Estimated Endurance" + fi + + elif [[ "$disk_type" == "HDD" ]]; then + if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + fi + if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then + echo "Pending Sectors: $pending_sectors" + fi + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") + echo "Lifespan: $lifespan" + else + print_color $YELLOW "Limited information available for this disk type" + echo "This is normal for hardware RAID configurations like PERC H730P" + echo "For detailed SAS drive information, use controller management tools" + fi + + echo "" +} + +# Function to detect all disks with enhanced SAS support (no partitions) - FIXED +detect_disks() { + local disks=() + + # Check for SATA/SAS disks - only main devices, no partitions + for disk in /dev/sd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks - only main devices, no partitions + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for SAS disks via SCSI generic - only main devices + for disk in /dev/sg[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types - only main devices + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Function to detect RAID controllers (Ubuntu specific) - FIXED +detect_raid_controllers() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") + local raid_disks=() + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different disk devices for each controller + for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do + if [[ -b "$base_disk" ]]; then + if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then + raid_disks+=("$base_disk:$controller,$i") + break + fi + fi + done + done + done + + echo "${raid_disks[@]}" +} + +# Main function - FIXED +main() { + print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" + print_color $BLUE "Enhanced with PERC H730P and SAS Support" + print_color $BLUE "============================================" + echo "" + + check_dependencies + + local disks=() + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks - FIXED: don't mix output with disk detection + print_color $CYAN "Auto-detecting disks (excluding partitions)..." + local direct_disks=() + read -ra direct_disks <<< "$(detect_disks)" + + print_color $CYAN "Scanning for RAID controllers..." + local raid_disks=() + read -ra raid_disks <<< "$(detect_raid_controllers)" + + # Combine both lists + disks=("${direct_disks[@]}" "${raid_disks[@]}") + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root." + print_color $YELLOW "Some disks/controllers may show limited information." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk_info in "${disks[@]}"; do + # Check if this is a RAID disk (has controller specified) + if [[ "$disk_info" == *":"* ]]; then + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + else + check_disk "$disk_info" + fi + done + + print_color $BLUE "Check completed!" + echo "" + print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" + print_color $CYAN " - Install 'storcli' for detailed controller information" + print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" + print_color $CYAN " - Hardware RAID controllers often limit SMART data access" + echo "" + print_color $CYAN "Ubuntu-specific tips:" + print_color $CYAN " - Use 'lsblk' to see all available block devices" + print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" + echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/ubuntu-v2.6.sh b/old/ubuntu-v2.6.sh similarity index 100% rename from ubuntu-v2.6.sh rename to old/ubuntu-v2.6.sh diff --git a/ubuntu-v2.7.sh b/old/ubuntu-v2.7.sh similarity index 100% rename from ubuntu-v2.7.sh rename to old/ubuntu-v2.7.sh diff --git a/ubuntu-v2.8.sh b/old/ubuntu-v2.8.sh similarity index 100% rename from ubuntu-v2.8.sh rename to old/ubuntu-v2.8.sh diff --git a/ubuntu-v3.0.sh b/old/ubuntu-v3.0.sh similarity index 100% rename from ubuntu-v3.0.sh rename to old/ubuntu-v3.0.sh diff --git a/ubuntu-v2.3.sh b/ubuntu-v2.3.sh new file mode 100755 index 0000000..b4df67f --- /dev/null +++ b/ubuntu-v2.3.sh @@ -0,0 +1,531 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.3" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 4TB, use 4TB tier with scaling + echo "4000" +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + if [[ -z "$power_on_hours" ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$power_on_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$power_on_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$power_on_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to check a single disk +check_disk() { + local disk=$1 + + print_color $CYAN "Checking disk: $disk" + echo "==================================================" + + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi + + # Get disk information + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract capacity in GB + local capacity_gb=0 + if echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + power_on_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + total_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + [[ -z "$total_written" ]] && total_written=$(echo "$nvme_attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect all disks +detect_disks() { + local disks=() + + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks (base devices only, no partitions) + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" + echo "" + + check_dependencies + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk in "${disks[@]}"; do + check_disk "$disk" + done + + print_color $BLUE "Check completed!" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/ubuntu-v2.4.sh b/ubuntu-v2.4.sh new file mode 100755 index 0000000..35cd9a0 --- /dev/null +++ b/ubuntu-v2.4.sh @@ -0,0 +1,638 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="2.4" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc) TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc) GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc) MB" + else + echo "$bytes bytes" + fi +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + else + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown|New" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" + return + fi + + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" + fi + else + echo "Unknown|${estimated_endurance} TB|New|$disk_type" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[TtGg][Bb] ]]; then + # Pattern: [500.1 GB] or [1.0 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + if echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[TtGg][Bb] ]]; then + # Pattern: 500.1GB or 1.0TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + if echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to check a single disk +check_disk() { + local disk=$1 + + print_color $CYAN "Checking disk: $disk" + echo "==================================================" + + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi + + # Get disk information + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human=$(bytes_to_human "$(echo "$capacity_gb * 1000 * 1000 * 1000" | bc 2>/dev/null)") + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + echo "Power On Hours: ${power_on_hours:-Unknown}" + + # Disk type specific analysis + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + fi + + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + fi + + echo "" +} + +# Function to detect all disks +detect_disks() { + local disks=() + + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks (base devices only, no partitions) + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" + echo "" + + check_dependencies + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk in "${disks[@]}"; do + check_disk "$disk" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/ubuntu-v2.5.sh b/ubuntu-v2.5.sh index bb64ff9..ea67d35 100755 --- a/ubuntu-v2.5.sh +++ b/ubuntu-v2.5.sh @@ -1,8 +1,8 @@ #!/bin/bash -# Disk Health Check Script for Ubuntu 24.04 -# Enhanced with SAS/PERC H730P controller support -# Checks SSD TBW/lifespan and HDD health status +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification SCRIPT_NAME=$(basename "$0") VERSION="2.5" @@ -13,6 +13,7 @@ GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) NC=$(tput sgr0) # Function to print colored output @@ -46,190 +47,153 @@ check_dependencies() { fi } -# Function to test SMART access and get available data - FIXED VERSION -test_smart_access() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - # Test basic SMART access - if ! $smart_cmd -i "$disk" &>/dev/null; then - echo "no_access" - return - fi - - # Get SMART information - local smart_info=$($smart_cmd -i "$disk" 2>/dev/null) - - # Check if SMART is available - FIXED PARSING - if ! echo "$smart_info" | grep -q "SMART support is:"; then - echo "not_available" - return - fi - - # Extract SMART status - FIXED LOGIC - local smart_support_line=$(echo "$smart_info" | grep "SMART support is:") - local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "") - local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "") - - if [[ -z "$smart_available" ]]; then - echo "not_available" - return - fi - - if [[ -z "$smart_enabled" ]]; then - echo "disabled" - return - fi - - # Test attribute reading - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then - echo "no_attributes" - return - fi - - echo "full_access" -} +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) -# Function to get disk information with enhanced SAS support -get_disk_info() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - local info=$($smart_cmd -i "$disk" 2>/dev/null) - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - local health=$($smart_cmd -H "$disk" 2>/dev/null) - - # Extract information with multiple fallbacks for SAS drives - local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" - - local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) - - local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) - - # Get disk type with SAS support - local disk_type="UNKNOWN" - if echo "$info" | grep -qi "Solid State Device"; then - disk_type="SSD" - elif echo "$info" | grep -qi "Rotation Rate"; then - disk_type="HDD" - elif echo "$info" | grep -qi "SCSI\|SAS"; then - # SAS drives often don't specify, check rotation rate - if echo "$info" | grep -qi "15000\|10000\|7200"; then - disk_type="HDD" - else - disk_type="SSD" - fi - fi - - # Extract SMART attributes with multiple field attempts - local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1) - - local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) - - local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) - - # For Kingston and other SSDs with different attribute names - local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1) - local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) - - # For wear leveling indicators - local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1) - - echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" -} +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) -# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON -calculate_tbw() { - local raw_value=$1 - local sectors=$2 - local disk_model=$3 +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") - # Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Convert from GiB to TB - local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier return fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" fi - - if [[ -n "$sectors" && "$sectors" != "0" ]]; then - local bytes=$((sectors * 512)) - local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" - elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" else echo "0" fi } -# Function to estimate SSD endurance based on model and capacity -estimate_ssd_endurance() { - local disk_model=$1 - local capacity_gb=$2 +# Function to get disk type and interface +get_disk_info() { + local disk=$1 - # Kingston consumer SSDs - if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then - if [[ $capacity_gb -ge 960 ]]; then - echo "300" # 300TB for 960GB Kingston SA400 - elif [[ $capacity_gb -ge 480 ]]; then - echo "150" # 150TB for 480GB Kingston - else - echo "80" # 80TB for smaller Kingston + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true fi - # SAS SSDs typically have very high endurance - elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then - # Enterprise SAS SSDs - very high endurance - if [[ $capacity_gb -ge 1000 ]]; then - echo "10000" # 10PB for 1TB+ enterprise SAS SSD - elif [[ $capacity_gb -ge 600 ]]; then - echo "6000" # 6PB for 600GB enterprise SAS SSD - elif [[ $capacity_gb -ge 400 ]]; then - echo "4000" # 4PB for 400GB enterprise SAS SSD + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") else - echo "2000" # 2PB for smaller enterprise SAS SSD - fi - elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then - # Enterprise SATA/NVMe SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "1200" # 1.2PB for 1TB enterprise - elif [[ $capacity_gb -ge 480 ]]; then - echo "600" # 600TB for 480GB enterprise - elif [[ $capacity_gb -ge 240 ]]; then - echo "300" # 300TB for 240GB enterprise - else - echo "150" # 150TB for smaller enterprise + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" else - # Consumer SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "600" # 600TB for 1TB consumer - elif [[ $capacity_gb -ge 480 ]]; then - echo "300" # 300TB for 480GB consumer - elif [[ $capacity_gb -ge 240 ]]; then - echo "150" # 150TB for 240GB consumer - elif [[ $capacity_gb -ge 120 ]]; then - echo "80" # 80TB for 120GB consumer - else - echo "40" # 40TB for smaller drives - fi + echo "${CONSUMER_TBW[$capacity_tier]}" fi } @@ -237,60 +201,40 @@ estimate_ssd_endurance() { estimate_ssd_lifespan() { local power_on_hours=$1 local tbw_used=$2 - local disk_model=$3 - local capacity_gb=$4 - local media_wearout=$5 + local estimated_endurance=$3 + local disk_type=$4 if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then - echo "Unknown||Unknown||Unknown" + echo "Unknown||Unknown|New" return fi - local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") - local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") - - # If we have media wearout indicator, use it for more accurate estimation - if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then - # For Kingston, SSD_Life_Left is already a percentage - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - else - # For other drives, media_wearout might be countdown from 100 - local wear_percent=$media_wearout - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - fi + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" return fi - if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then - echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then - echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" else - echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" fi else - echo "Unknown|${estimated_endurance} TB|New|estimated" + echo "Unknown|${estimated_endurance} TB|New|$disk_type" fi } @@ -299,185 +243,344 @@ estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 - if [[ -z "$power_on_hours" ]]; then + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then echo "Unknown" return fi - power_on_hours=${power_on_hours:-0} - reallocated_sectors=${reallocated_sectors:-0} - pending_sectors=${pending_sectors:-0} + local severity=0 + # Critical issues if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return elif [[ "$reallocated_sectors" -gt 100 ]]; then - echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + severity=$((severity + 3)) elif [[ "$reallocated_sectors" -gt 10 ]]; then - echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" - elif [[ "$power_on_hours" -gt 40000 ]]; then - echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)" - elif [[ "$power_on_hours" -gt 25000 ]]; then - echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)" + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" else - echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)" + echo "${GREEN}> 3 years${NC} (Healthy)" fi } -# Function to check a single disk with enhanced error handling +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity=$1 + local capacity_gb=$2 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to check a single disk check_disk() { local disk=$1 - local controller=$2 - print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + print_color $CYAN "Checking disk: $disk" echo "==================================================" - # Test SMART access level - local access_level=$(test_smart_access "$disk" "$controller") - - case $access_level in - "no_access") - print_color $RED "ERROR: Cannot access disk through controller" - echo "Possible reasons:" - echo " - Controller doesn't support SMART passthrough" - echo " - Disk is part of a hardware RAID array" - echo " - Insufficient permissions (try running as root)" - echo " - Controller busy or offline" - echo "" - return - ;; - "not_available") - print_color $YELLOW "SMART not available on this disk" - echo "This disk does not support SMART monitoring" - echo "" - return - ;; - "disabled") - print_color $YELLOW "SMART is disabled on this disk" - echo "SMART is available but currently disabled" - echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" - echo "" - return - ;; - "no_attributes") - print_color $YELLOW "WARNING: Cannot read SMART attributes" - echo "This is common with hardware RAID controllers like PERC H730P" - echo "Try checking through the RAID management interface" - echo "" - return - ;; - "limited_attributes") - print_color $YELLOW "NOTE: Limited SMART data available" - echo "Controller is filtering some SMART attributes" - ;; - esac + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi # Get disk information - local disk_info=$(get_disk_info "$disk" "$controller") - IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb") + + # If capacity extraction failed, try alternative method + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name or other methods + if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + capacity_human="1 TB" + elif [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + capacity_human="2 TB" + elif [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 250[Gg] ]]; then + capacity_gb=250 + capacity_human="250 GB" + fi + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" - echo "Capacity: ${capacity:-Unknown}" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" - - # Only show power on hours if available - if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then - echo "Power On Hours: $power_on_hours" - else - echo "Power On Hours: Unknown" - fi + echo "Power On Hours: ${power_on_hours:-Unknown}" # Disk type specific analysis - if [[ "$disk_type" == "SSD" ]]; then + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then local tbw_used=0 if [[ -n "$total_written" && "$total_written" != "0" ]]; then - tbw_used=$(calculate_tbw "" "$total_written" "$model") + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then - tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model") + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") fi - if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - echo "TBW Used: ${tbw_used} TB" - fi + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") - # Estimate capacity for endurance calculation - local capacity_gb=0 - if echo "$capacity" | grep -qi "GB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) - elif echo "$capacity" | grep -qi "TB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) - fi + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" - local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) - local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) - if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + if [[ "$estimated_endurance" != "N/A" ]]; then echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" fi - echo "Lifespan: $lifespan_percent ($wear_status)" - - # Show wear source if available - if [[ "$wear_source" == "media_wearout" ]]; then - echo "Wear Source: Media Wearout Indicator" - elif [[ "$wear_source" == "tbw" ]]; then - echo "Wear Source: TBW Calculation" - elif [[ "$wear_source" == "estimated" ]]; then - echo "Wear Source: Estimated Endurance" + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" fi - - elif [[ "$disk_type" == "HDD" ]]; then - if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then - echo "Realloc Sectors: $reallocated_sectors" - fi - if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then - echo "Pending Sectors: $pending_sectors" - fi - - local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") - echo "Lifespan: $lifespan" else - print_color $YELLOW "Limited information available for this disk type" - echo "This is normal for hardware RAID configurations like PERC H730P" - echo "For detailed SAS drive information, use controller management tools" + print_color $YELLOW "Unknown disk type - limited information available" fi echo "" } -# Function to detect all disks with enhanced SAS support (no partitions) - FIXED +# Function to detect all disks detect_disks() { local disks=() - # Check for SATA/SAS disks - only main devices, no partitions - for disk in /dev/sd[a-z]; do + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for NVMe disks - only main devices, no partitions + # Check for NVMe disks (base devices only, no partitions) for disk in /dev/nvme[0-9]n[0-9]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for SAS disks via SCSI generic - only main devices - for disk in /dev/sg[0-9]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - # Check for other disk types - only main devices + # Check for other disk types for disk in /dev/vd[a-z] /dev/xvd[a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") @@ -487,40 +590,19 @@ detect_disks() { echo "${disks[@]}" } -# Function to detect RAID controllers (Ubuntu specific) - FIXED -detect_raid_controllers() { - local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") - local raid_disks=() - - # Check for RAID controllers - for controller in "${controllers[@]}"; do - for i in {0..31}; do - # Try different disk devices for each controller - for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do - if [[ -b "$base_disk" ]]; then - if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then - raid_disks+=("$base_disk:$controller,$i") - break - fi - fi - done - done - done - - echo "${raid_disks[@]}" -} - -# Main function - FIXED +# Main function main() { - print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" - print_color $BLUE "Enhanced with PERC H730P and SAS Support" - print_color $BLUE "============================================" + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" echo "" check_dependencies local disks=() + # Check for soft-raid first + check_mdraid + # If specific disk provided, check only that disk if [[ $# -gt 0 ]]; then for disk in "$@"; do @@ -531,17 +613,9 @@ main() { fi done else - # Auto-detect disks - FIXED: don't mix output with disk detection - print_color $CYAN "Auto-detecting disks (excluding partitions)..." - local direct_disks=() - read -ra direct_disks <<< "$(detect_disks)" - - print_color $CYAN "Scanning for RAID controllers..." - local raid_disks=() - read -ra raid_disks <<< "$(detect_raid_controllers)" - - # Combine both lists - disks=("${direct_disks[@]}" "${raid_disks[@]}") + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then @@ -555,33 +629,20 @@ main() { # Check if running as root, warn if not if [[ $EUID -ne 0 ]]; then - print_color $YELLOW "Warning: Not running as root." - print_color $YELLOW "Some disks/controllers may show limited information." + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." echo "For complete results, run as: sudo $0" echo "" fi # Check each disk - for disk_info in "${disks[@]}"; do - # Check if this is a RAID disk (has controller specified) - if [[ "$disk_info" == *":"* ]]; then - IFS=':' read -r disk controller <<< "$disk_info" - check_disk "$disk" "$controller" - else - check_disk "$disk_info" - fi + for disk in "${disks[@]}"; do + check_disk "$disk" done print_color $BLUE "Check completed!" echo "" - print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" - print_color $CYAN " - Install 'storcli' for detailed controller information" - print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" - print_color $CYAN " - Hardware RAID controllers often limit SMART data access" - echo "" - print_color $CYAN "Ubuntu-specific tips:" - print_color $CYAN " - Use 'lsblk' to see all available block devices" - print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." } # Usage information @@ -594,9 +655,10 @@ usage() { echo " $SCRIPT_NAME # Check all auto-detected disks" echo " sudo $SCRIPT_NAME # Check all disks (as root)" echo " $SCRIPT_NAME /dev/sda # Check specific disk" - echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" - echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" } # Parse command line arguments