From e7c4a83b73dbb248aab248021e58a5830ccc6976 Mon Sep 17 00:00:00 2001 From: "Adam.Tony" Date: Wed, 22 Oct 2025 15:02:09 +0800 Subject: [PATCH] moved old files --- alma-v2.1.sh => old/alma-v2.1.sh | 0 alma-v2.3.sh => old/alma-v2.3.sh | 0 alma-v2.5.sh => old/alma-v2.5.sh | 0 alma-v2.6.sh => old/alma-v2.6.sh | 0 ubuntu-v2.5.sh => old/alma-v2.7.sh | 128 ++- harvester-v2.1.sh => old/harvester-v2.1.sh | 0 harvester-v2.3.sh => old/harvester-v2.3.sh | 0 harvester-v2.5.sh => old/harvester-v2.5.sh | 0 harvester-v2.6.sh => old/harvester-v2.6.sh | 0 harvester-v2.7.sh => old/harvester-v2.7.sh | 0 harvester-v2.9.sh => old/harvester-v2.9.sh | 0 harvester-v3.0.sh => old/harvester-v3.0.sh | 0 harvester-v3.1.sh => old/harvester-v3.1.sh | 0 harvester-v3.2.sh => old/harvester-v3.2.sh | 0 harvester-v3.3.sh => old/harvester-v3.3.sh | 0 harvester-v3.4.sh => old/harvester-v3.4.sh | 0 harvester-v3.5.sh => old/harvester-v3.5.sh | 0 harvester-v3.6.sh => old/harvester-v3.6.sh | 0 harvester-v3.7.sh => old/harvester-v3.7.sh | 0 .../harvester-v3.8-old.sh | 0 old/harvester-v3.8.sh | 869 ++++++++++++++++ old/{ => obsolete}/alma-v2.4.sh | 0 old/{ => obsolete}/harvester-v2.4.sh | 0 old/obsolete/ubuntu-v2.5.sh | 615 ++++++++++++ old/obsolete/ubuntu-v2.6.sh | 650 ++++++++++++ old/obsolete/ubuntu-v2.7.sh | 690 +++++++++++++ old/{ => obsolete}/ubuntu-v2.8.sh | 0 old/{ => obsolete}/ubuntu-v3.0.sh | 0 ubuntu-v2.3.sh => old/ubuntu-v2.3.sh | 0 ubuntu-v2.4.sh => old/ubuntu-v2.4.sh | 0 old/ubuntu-v2.5.sh | 826 ++++++++------- old/ubuntu-v2.6.sh | 941 ++++++++++-------- old/ubuntu-v2.7.sh | 921 ++++++++--------- ubuntu-v2.6.sh | 759 -------------- 34 files changed, 4343 insertions(+), 2056 deletions(-) rename alma-v2.1.sh => old/alma-v2.1.sh (100%) mode change 100755 => 100644 rename alma-v2.3.sh => old/alma-v2.3.sh (100%) mode change 100755 => 100644 rename alma-v2.5.sh => old/alma-v2.5.sh (100%) mode change 100755 => 100644 rename alma-v2.6.sh => old/alma-v2.6.sh (100%) mode change 100755 => 100644 rename ubuntu-v2.5.sh => old/alma-v2.7.sh (85%) mode change 100755 => 100644 rename harvester-v2.1.sh => old/harvester-v2.1.sh (100%) mode change 100755 => 100644 rename harvester-v2.3.sh => old/harvester-v2.3.sh (100%) mode change 100755 => 100644 rename harvester-v2.5.sh => old/harvester-v2.5.sh (100%) mode change 100755 => 100644 rename harvester-v2.6.sh => old/harvester-v2.6.sh (100%) mode change 100755 => 100644 rename harvester-v2.7.sh => old/harvester-v2.7.sh (100%) mode change 100755 => 100644 rename harvester-v2.9.sh => old/harvester-v2.9.sh (100%) mode change 100755 => 100644 rename harvester-v3.0.sh => old/harvester-v3.0.sh (100%) mode change 100755 => 100644 rename harvester-v3.1.sh => old/harvester-v3.1.sh (100%) mode change 100755 => 100644 rename harvester-v3.2.sh => old/harvester-v3.2.sh (100%) mode change 100755 => 100644 rename harvester-v3.3.sh => old/harvester-v3.3.sh (100%) mode change 100755 => 100644 rename harvester-v3.4.sh => old/harvester-v3.4.sh (100%) mode change 100755 => 100644 rename harvester-v3.5.sh => old/harvester-v3.5.sh (100%) rename harvester-v3.6.sh => old/harvester-v3.6.sh (100%) rename harvester-v3.7.sh => old/harvester-v3.7.sh (100%) rename harvester-v3.8-old.sh => old/harvester-v3.8-old.sh (100%) create mode 100644 old/harvester-v3.8.sh rename old/{ => obsolete}/alma-v2.4.sh (100%) mode change 100755 => 100644 rename old/{ => obsolete}/harvester-v2.4.sh (100%) mode change 100755 => 100644 create mode 100644 old/obsolete/ubuntu-v2.5.sh create mode 100644 old/obsolete/ubuntu-v2.6.sh create mode 100644 old/obsolete/ubuntu-v2.7.sh rename old/{ => obsolete}/ubuntu-v2.8.sh (100%) mode change 100755 => 100644 rename old/{ => obsolete}/ubuntu-v3.0.sh (100%) mode change 100755 => 100644 rename ubuntu-v2.3.sh => old/ubuntu-v2.3.sh (100%) mode change 100755 => 100644 rename ubuntu-v2.4.sh => old/ubuntu-v2.4.sh (100%) mode change 100755 => 100644 delete mode 100755 ubuntu-v2.6.sh diff --git a/alma-v2.1.sh b/old/alma-v2.1.sh old mode 100755 new mode 100644 similarity index 100% rename from alma-v2.1.sh rename to old/alma-v2.1.sh diff --git a/alma-v2.3.sh b/old/alma-v2.3.sh old mode 100755 new mode 100644 similarity index 100% rename from alma-v2.3.sh rename to old/alma-v2.3.sh diff --git a/alma-v2.5.sh b/old/alma-v2.5.sh old mode 100755 new mode 100644 similarity index 100% rename from alma-v2.5.sh rename to old/alma-v2.5.sh diff --git a/alma-v2.6.sh b/old/alma-v2.6.sh old mode 100755 new mode 100644 similarity index 100% rename from alma-v2.6.sh rename to old/alma-v2.6.sh diff --git a/ubuntu-v2.5.sh b/old/alma-v2.7.sh old mode 100755 new mode 100644 similarity index 85% rename from ubuntu-v2.5.sh rename to old/alma-v2.7.sh index ea67d35..c1e5ce3 --- a/ubuntu-v2.5.sh +++ b/old/alma-v2.7.sh @@ -1,11 +1,11 @@ #!/bin/bash -# Disk Health Check Script for Ubuntu +# Disk Health Check Script for Alma Linux 9 # Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid # Supports consumer and enterprise disk classification SCRIPT_NAME=$(basename "$0") -VERSION="2.5" +VERSION="2.7" # Color codes RED=$(tput setaf 1) @@ -42,7 +42,7 @@ check_dependencies() { if [[ ${#missing[@]} -gt 0 ]]; then print_color $RED "Error: Missing required packages: ${missing[*]}" - echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + echo "Install with: sudo dnf install ${missing[*]}" exit 1 fi } @@ -332,34 +332,46 @@ check_mdraid() { done } -# Function to extract capacity in GB from various formats +# Function to extract capacity in GB from various formats - IMPROVED VERSION extract_capacity_gb() { local capacity=$1 local capacity_gb=0 - # Try different patterns to extract capacity - if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then - # Pattern: [1.82 TB] - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) - elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then - # Pattern: [500.1 GB] - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) - elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then - # Pattern: 500,107,862,016 bytes + # Remove extra spaces and normalize + capacity=$(echo "$capacity" | sed 's/ */ /g') + + # Method 1: Try to extract from User Capacity field with bytes + if [[ $capacity =~ ([0-9,]+)\s*bytes ]]; then local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) - elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then - # Pattern: 1.82TB + fi + + # Method 2: Try to extract from bracket format [XXX GB] or [X.XX TB] + if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ \[([0-9,.]+)\s*([GT])B?\] ]]; then local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + fi + + # Method 3: Try direct TB/GB pattern matching + if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*TB ]]; then + local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',') capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) - elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then - # Pattern: 500.1GB - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + elif [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*GB ]]; then + local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',') capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) fi + # Method 4: For NVMe - try different field formats + if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + fi + # Ensure we have a valid number if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then echo "0" @@ -388,8 +400,7 @@ get_nvme_capacity() { # Function to get human readable capacity get_human_capacity() { - local capacity=$1 - local capacity_gb=$2 + local capacity_gb=$1 if [[ $capacity_gb -ge 1000 ]]; then echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" @@ -398,6 +409,36 @@ get_human_capacity() { fi } +# Function to get capacity using block device information as fallback +get_block_device_capacity() { + local disk=$1 + local capacity_gb=0 + + # Try to get capacity from block device using lsblk or fdisk + if command_exists lsblk; then + local block_size=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null | head -1) + if [[ -n "$block_size" && "$block_size" =~ ^[0-9]+$ ]]; then + capacity_gb=$(echo "scale=0; $block_size / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + fi + fi + + # Alternative method using fdisk + if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then + local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk") + if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + fi + fi + + echo "$capacity_gb" +} + # Function to check a single disk check_disk() { local disk=$1 @@ -431,12 +472,12 @@ check_disk() { fi # Extract disk information - local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') - [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') - local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') - local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) - local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') # For NVMe disks, try to get capacity from different fields if [[ "$disk_type" == "NVMe" ]]; then @@ -448,27 +489,18 @@ check_disk() { # Extract capacity in GB and human readable format local capacity_gb=$(extract_capacity_gb "$capacity") - local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb") + local capacity_human="" - # If capacity extraction failed, try alternative method + # If capacity extraction failed, try block device method if [[ "$capacity_gb" -eq 0 ]]; then - # Try to get capacity from model name or other methods - if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then - capacity_gb=500 - capacity_human="500 GB" - elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then - capacity_gb=1000 - capacity_human="1 TB" - elif [[ "$model" =~ 2[Tt] ]]; then - capacity_gb=2000 - capacity_human="2 TB" - elif [[ "$model" =~ 500[Gg] ]]; then - capacity_gb=500 - capacity_human="500 GB" - elif [[ "$model" =~ 250[Gg] ]]; then - capacity_gb=250 - capacity_human="250 GB" - fi + capacity_gb=$(get_block_device_capacity "$disk") + fi + + # Generate human readable capacity + if [[ "$capacity_gb" -gt 0 ]]; then + capacity_human=$(get_human_capacity "$capacity_gb") + else + capacity_human="Unknown" fi local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') @@ -592,8 +624,8 @@ detect_disks() { # Main function main() { - print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" - print_color $BLUE "==============================================" + print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9" + print_color $BLUE "====================================================" echo "" check_dependencies diff --git a/harvester-v2.1.sh b/old/harvester-v2.1.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v2.1.sh rename to old/harvester-v2.1.sh diff --git a/harvester-v2.3.sh b/old/harvester-v2.3.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v2.3.sh rename to old/harvester-v2.3.sh diff --git a/harvester-v2.5.sh b/old/harvester-v2.5.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v2.5.sh rename to old/harvester-v2.5.sh diff --git a/harvester-v2.6.sh b/old/harvester-v2.6.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v2.6.sh rename to old/harvester-v2.6.sh diff --git a/harvester-v2.7.sh b/old/harvester-v2.7.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v2.7.sh rename to old/harvester-v2.7.sh diff --git a/harvester-v2.9.sh b/old/harvester-v2.9.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v2.9.sh rename to old/harvester-v2.9.sh diff --git a/harvester-v3.0.sh b/old/harvester-v3.0.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v3.0.sh rename to old/harvester-v3.0.sh diff --git a/harvester-v3.1.sh b/old/harvester-v3.1.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v3.1.sh rename to old/harvester-v3.1.sh diff --git a/harvester-v3.2.sh b/old/harvester-v3.2.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v3.2.sh rename to old/harvester-v3.2.sh diff --git a/harvester-v3.3.sh b/old/harvester-v3.3.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v3.3.sh rename to old/harvester-v3.3.sh diff --git a/harvester-v3.4.sh b/old/harvester-v3.4.sh old mode 100755 new mode 100644 similarity index 100% rename from harvester-v3.4.sh rename to old/harvester-v3.4.sh diff --git a/harvester-v3.5.sh b/old/harvester-v3.5.sh similarity index 100% rename from harvester-v3.5.sh rename to old/harvester-v3.5.sh diff --git a/harvester-v3.6.sh b/old/harvester-v3.6.sh similarity index 100% rename from harvester-v3.6.sh rename to old/harvester-v3.6.sh diff --git a/harvester-v3.7.sh b/old/harvester-v3.7.sh similarity index 100% rename from harvester-v3.7.sh rename to old/harvester-v3.7.sh diff --git a/harvester-v3.8-old.sh b/old/harvester-v3.8-old.sh similarity index 100% rename from harvester-v3.8-old.sh rename to old/harvester-v3.8-old.sh diff --git a/old/harvester-v3.8.sh b/old/harvester-v3.8.sh new file mode 100644 index 0000000..9c18f8b --- /dev/null +++ b/old/harvester-v3.8.sh @@ -0,0 +1,869 @@ +#!/bin/bash + +# Disk Health Check Script for Harvester OS +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification +# Created by Adam T. Lau + +SCRIPT_NAME=$(basename "$0") +VERSION="3.8" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if required commands are installed +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +if ! command_exists smartctl; then + print_color $RED "Error: smartctl is not installed. Please install smartmontools package." + exit 1 +fi + +# Known model capacities +declare -A MODEL_CAPACITIES=( + ["ST91000640NS"]="1000" + ["ST2000NM0033"]="2000" + ["ST4000NM0033"]="4000" + ["MB1000GCWCV"]="1000" + ["MB2000GCWDB"]="2000" + ["AL15SEB120N"]="1200" + ["AL15SEB600N"]="600" + ["HUC101212CSS600"]="1200" + ["HUC103012CSS600"]="3000" + ["HUC109090CSS600"]="900" + ["MAX3147RC"]="147" + ["ST3146356SS"]="146" + ["ST3146855SS"]="146" + ["ST33000650SS"]="3000" + ["ST3600057SS"]="600" + ["ST9146803SS"]="146" + ["ST973451SS"]="73" + ["AL13SXB300N"]="300" + ["KPM6XRUG960G"]="960" + ["MZILT3T8HBLS0D3"]="3840" + ["MZILT960HBHQ0D3"]="960" + # Add more models as encountered +) + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + echo "8000" +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + smart_cmd+=" -i $disk" + + local info=$($smart_cmd 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + transport="SAS" + is_enterprise=true + # Determine if SAS disk is HDD or SSD + if echo "$info" | grep -qi "Solid State Device\|SSD"; then + disk_type="SAS SSD" + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="SAS HDD" + else + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -n "$model" ]]; then + if echo "$model" | grep -qi "SSD\|Solid State"; then + disk_type="SAS SSD" + else + disk_type="SAS HDD" + fi + else + disk_type="SAS HDD" + fi + fi + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device\|SSD"; then + disk_type="SATA SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="SATA HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -n "$model" ]]; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + if echo "$model" | grep -qi "SSD\|Solid State" && [[ "$disk_type" == "UNKNOWN" ]]; then + disk_type="SSD" + [[ "$transport" == "" ]] && transport="SATA" + fi + fi + + if [[ "$disk_type" == "UNKNOWN" ]]; then + disk_type="Unknown" + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to get SAS disk attributes +get_sas_attributes() { + local disk=$1 + local controller=$2 + local disk_type=$3 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local power_on_hours="" + local reallocated_sectors="" + local pending_sectors="" + local start_stop_count="" + local load_cycle_count="" + local total_written="" + local temperature="" + local model="" + local serial="" + local firmware="" + local media_wearout="" + local percent_lifetime_used="" + local has_write_data=false + + # Try extended information first for SAS disks + local attributes=$($smart_cmd -x "$disk" 2>/dev/null) + + # If extended fails, try standard attributes + if [[ -z "$attributes" ]]; then + attributes=$($smart_cmd -a "$disk" 2>/dev/null) + fi + + if [[ -n "$attributes" ]]; then + # Extract model information + model=$(echo "$attributes" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$attributes" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + serial=$(echo "$attributes" | grep -i "Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//') + firmware=$(echo "$attributes" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract power on hours - try multiple formats + power_on_hours=$(echo "$attributes" | grep -i "Accumulated power on time" | grep -oE "[0-9]+:[0-9]+" | head -1) + if [[ -n "$power_on_hours" ]]; then + local hours=$(echo "$power_on_hours" | cut -d: -f1) + power_on_hours="$hours" + else + # Try alternative format + power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $NF}' | head -1) + fi + + # Extract temperature + temperature=$(echo "$attributes" | grep -i "Current Drive Temperature" | grep -oE "[0-9]+" | head -1) + [[ -z "$temperature" ]] && temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}' | head -1) + + # Extract mechanical counters for SAS HDDs + if [[ "$disk_type" == "SAS HDD" ]]; then + start_stop_count=$(echo "$attributes" | grep -i "Accumulated start-stop cycles" | grep -oE "[0-9]+" | head -1) + load_cycle_count=$(echo "$attributes" | grep -i "Accumulated load-unload cycles" | grep -oE "[0-9]+" | head -1) + fi + + # Extract error counters + local error_count=$(echo "$attributes" | grep -i "Elements in grown defect list" | grep -oE "[0-9]+" | head -1) + if [[ -n "$error_count" ]]; then + reallocated_sectors="$error_count" + fi + + # For SAS SSDs, look for comprehensive wear indicators + if [[ "$disk_type" == "SAS SSD" ]]; then + # Try multiple patterns for write data + total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Lifetime_Writes" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "NAND_Writes" | awk '{print $NF}' | head -1) + + # Check if we actually found write data + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + has_write_data=true + fi + + # Look for wear level indicators + media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator" | awk '{print $NF}' | head -1) + percent_lifetime_used=$(echo "$attributes" | grep -i "Percent_Lifetime_Used" | awk '{print $NF}' | head -1) + [[ -z "$percent_lifetime_used" ]] && percent_lifetime_used=$(echo "$attributes" | grep -i "Wear_Leveling_Count" | awk '{print $NF}' | head -1) + fi + fi + + echo "$power_on_hours|$reallocated_sectors|$pending_sectors|$start_stop_count|$load_cycle_count|$temperature|$model|$serial|$firmware|$total_written|$media_wearout|$percent_lifetime_used|$has_write_data" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$((bytes / 1000000000000)) + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$((raw_value * 32 / 1000000)) + else + # SATA/SAS SSD: various manufacturers + tbw=$((raw_value * 32 / 1000000)) + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance - SIMPLIFIED FOR SAS SSDs +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + local has_write_data=$4 + + # HDDs don't have TBW + if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" || "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + # For SAS SSDs without write data, don't provide unrealistic estimates + if [[ "$disk_type" == "SAS SSD" && "$has_write_data" == "false" ]]; then + echo "UNKNOWN" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + local percent_lifetime_used=$5 + local has_write_data=$6 + + # For SAS SSDs without write data, be honest about limitations + if [[ "$disk_type" == "SAS SSD" && "$has_write_data" == "false" ]]; then + echo "N/A|N/A|Cannot determine - SAS SSD does not expose write statistics" + return + fi + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown|Unknown|New drive" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|HDD - no endurance rating" + return + fi + + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$((estimated_endurance - clean_tbw_used)) + + if [[ $clean_tbw_used -gt 0 ]]; then + local lifespan_used=$((clean_tbw_used * 100 / estimated_endurance)) + local lifespan_remaining=$((100 - lifespan_used)) + + if [[ $lifespan_used -ge 80 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear" + elif [[ $lifespan_used -ge 50 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy" + fi + else + echo "Unknown|${estimated_endurance} TB|New" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + local temperature=$7 + + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Temperature warning + if [[ -n "$temperature" && "$temperature" -gt 50 ]]; then + severity=$((severity + 2)) + elif [[ -n "$temperature" && "$temperature" -gt 40 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to get capacity using direct block device methods +get_disk_capacity() { + local disk=$1 + local controller=$2 + local disk_type=$3 + + local capacity_gb=0 + local capacity_human="Unknown" + + # Method 1: Try lsblk first + if command_exists lsblk; then + local lsblk_output=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null) + if [[ -n "$lsblk_output" && "$lsblk_output" =~ ^[0-9]+$ ]]; then + capacity_gb=$((lsblk_output / 1000000000)) + fi + fi + + # Method 2: Try fdisk + if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then + local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk") + if [[ -n "$fdisk_info" ]]; then + if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$((size * 1000)) + else + capacity_gb=$size + fi + elif [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])B ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$((size * 1000)) + else + capacity_gb=$size + fi + elif [[ $fdisk_info =~ ([0-9,.]+)\s*bytes ]]; then + local bytes=$(echo "$fdisk_info" | grep -oE '[0-9,]+' | head -1 | tr -d ',') + capacity_gb=$((bytes / 1000000000)) + fi + fi + fi + + # Method 3: Try blockdev + if [[ $capacity_gb -eq 0 ]] && command_exists blockdev; then + local blockdev_size=$(blockdev --getsize64 "$disk" 2>/dev/null) + if [[ -n "$blockdev_size" && "$blockdev_size" =~ ^[0-9]+$ ]]; then + capacity_gb=$((blockdev_size / 1000000000)) + fi + fi + + # Method 4: Model-based lookup + if [[ $capacity_gb -eq 0 ]]; then + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + local model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') + + if [[ -n "$model" && -n "${MODEL_CAPACITIES[$model]}" ]]; then + capacity_gb="${MODEL_CAPACITIES[$model]}" + fi + fi + + # Generate human readable capacity + if [[ $capacity_gb -gt 0 ]]; then + if [[ $capacity_gb -ge 1000 ]]; then + local tb_capacity=$((capacity_gb / 1000)) + capacity_human="${tb_capacity} TB" + else + capacity_human="${capacity_gb} GB" + fi + else + capacity_human="Unknown" + fi + + echo "$capacity_gb|$capacity_human" +} + +# Function to check a single disk +check_disk() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + + # Check if we can read the disk + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$firmware" ]] && firmware=$(echo "$info" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Get capacity + local capacity_info=$(get_disk_capacity "$disk" "$controller" "$disk_type") + local capacity_gb=$(echo "$capacity_info" | cut -d'|' -f1) + local capacity_human=$(echo "$capacity_info" | cut -d'|' -f2) + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART Health Status" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes based on disk type + local power_on_hours="" + local reallocated_sectors="" + local pending_sectors="" + local start_stop_count="" + local load_cycle_count="" + local total_written="" + local host_writes_32mib="" + local temperature="" + local sas_model="" + local sas_serial="" + local sas_firmware="" + local sas_total_written="" + local media_wearout="" + local percent_lifetime_used="" + local has_write_data="false" + + if [[ "$disk_type" == "SAS HDD" || "$disk_type" == "SAS SSD" ]]; then + local sas_attrs=$(get_sas_attributes "$disk" "$controller" "$disk_type") + power_on_hours=$(echo "$sas_attrs" | cut -d'|' -f1) + reallocated_sectors=$(echo "$sas_attrs" | cut -d'|' -f2) + pending_sectors=$(echo "$sas_attrs" | cut -d'|' -f3) + start_stop_count=$(echo "$sas_attrs" | cut -d'|' -f4) + load_cycle_count=$(echo "$sas_attrs" | cut -d'|' -f5) + temperature=$(echo "$sas_attrs" | cut -d'|' -f6) + sas_model=$(echo "$sas_attrs" | cut -d'|' -f7) + sas_serial=$(echo "$sas_attrs" | cut -d'|' -f8) + sas_firmware=$(echo "$sas_attrs" | cut -d'|' -f9) + sas_total_written=$(echo "$sas_attrs" | cut -d'|' -f10) + media_wearout=$(echo "$sas_attrs" | cut -d'|' -f11) + percent_lifetime_used=$(echo "$sas_attrs" | cut -d'|' -f12) + has_write_data=$(echo "$sas_attrs" | cut -d'|' -f13) + + # Use SAS-extracted data if available + [[ -n "$sas_model" ]] && model="$sas_model" + [[ -n "$sas_serial" ]] && serial="$sas_serial" + [[ -n "$sas_firmware" ]] && firmware="$sas_firmware" + [[ -n "$sas_total_written" ]] && total_written="$sas_total_written" + else + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + if [[ -n "$attributes" ]]; then + power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}') + reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}') + pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}') + start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}') + load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}') + total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}') + host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}') + + # For non-SAS SSDs, we assume write data is available if we found any + if [[ "$disk_type" == "SATA SSD" || "$disk_type" == "NVMe" ]]; then + if [[ -n "$total_written" || -n "$host_writes_32mib" ]]; then + has_write_data="true" + fi + fi + fi + fi + + # Clean up extracted values + power_on_hours=$(extract_numeric_hours "$power_on_hours") + reallocated_sectors=${reallocated_sectors:-0} + pending_sectors=${pending_sectors:-0} + start_stop_count=${start_stop_count:-0} + load_cycle_count=${load_cycle_count:-0} + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + + # Show temperature if available + if [[ -n "$temperature" && "$temperature" != "0" ]]; then + echo "Temperature: ${temperature} C" + fi + + # Only show Power On Hours if we have a valid value + if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then + echo "Power On Hours: $power_on_hours" + else + echo "Power On Hours: Unknown" + fi + + # Show wear indicators for SSDs if available + if [[ "$disk_type" == "SAS SSD" || "$disk_type" == "SATA SSD" ]]; then + if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then + echo "Media Wearout: $media_wearout" + fi + if [[ -n "$percent_lifetime_used" && "$percent_lifetime_used" != "0" ]]; then + echo "Lifetime Used: ${percent_lifetime_used}%" + fi + fi + + # Disk type specific analysis + if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + echo "Pending Sectors: $pending_sectors" + + # Only show mechanical counters if we have values + if [[ -n "$start_stop_count" && "$start_stop_count" != "0" ]]; then + echo "Start/Stop Count: $start_stop_count" + fi + if [[ -n "$load_cycle_count" && "$load_cycle_count" != "0" ]]; then + echo "Load Cycle Count: $load_cycle_count" + fi + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "$reallocated_sectors" "$pending_sectors" "$start_stop_count" "$load_cycle_count" "$disk_type" "$temperature") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SATA SSD" || "$disk_type" == "SAS SSD" || "$disk_type" == "NVMe" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type" "$has_write_data") + + # Handle SAS SSDs without write data specially + if [[ "$disk_type" == "SAS SSD" && "$estimated_endurance" == "UNKNOWN" ]]; then + echo "TBW Used: Not available" + echo "TBW Endurance: Not available (SAS SSD does not expose write statistics)" + echo "Lifespan: ${GREEN}Healthy${NC} (based on SMART health status)" + else + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type" "$percent_lifetime_used" "$has_write_data") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + else + echo "TBW Used: ${tbw_used} TB" + echo "Lifespan: Unknown (Cannot estimate without usage data)" + fi + fi + + if [[ "$disk_type" == "SAS SSD" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + echo "Pending Sectors: $pending_sectors" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + echo "Realloc Sectors: $reallocated_sectors" + echo "Pending Sectors: $pending_sectors" + fi + + echo "" +} + +# Function to detect RAID controllers and disks +detect_raid_disks() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") + local disks=() + + # Check for direct disks first + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do + if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then + disks+=("$base_disk:$controller,$i") + break + fi + done + done + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" + print_color $BLUE "Created by Adam T. Lau" + print_color $BLUE "====================================================" + echo "" + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_raid_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $YELLOW "No disks found via auto-detection" + print_color $CYAN "Trying direct disk access..." + for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check each disk + for disk_info in "${disks[@]}"; do + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: SAS SSDs often do not expose write statistics through SMART." + print_color $YELLOW " TBW information may not be available for these drives." + print_color $YELLOW " SSD/NVMe TBW endurance may be higher depending on the specific model." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks and RAID arrays" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" + echo "Created by Adam T. Lau" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + echo "Created by Adam T. Lau" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/old/alma-v2.4.sh b/old/obsolete/alma-v2.4.sh old mode 100755 new mode 100644 similarity index 100% rename from old/alma-v2.4.sh rename to old/obsolete/alma-v2.4.sh diff --git a/old/harvester-v2.4.sh b/old/obsolete/harvester-v2.4.sh old mode 100755 new mode 100644 similarity index 100% rename from old/harvester-v2.4.sh rename to old/obsolete/harvester-v2.4.sh diff --git a/old/obsolete/ubuntu-v2.5.sh b/old/obsolete/ubuntu-v2.5.sh new file mode 100644 index 0000000..bb64ff9 --- /dev/null +++ b/old/obsolete/ubuntu-v2.5.sh @@ -0,0 +1,615 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu 24.04 +# Enhanced with SAS/PERC H730P controller support +# Checks SSD TBW/lifespan and HDD health status + +SCRIPT_NAME=$(basename "$0") +VERSION="2.5" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# Function to test SMART access and get available data - FIXED VERSION +test_smart_access() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + # Test basic SMART access + if ! $smart_cmd -i "$disk" &>/dev/null; then + echo "no_access" + return + fi + + # Get SMART information + local smart_info=$($smart_cmd -i "$disk" 2>/dev/null) + + # Check if SMART is available - FIXED PARSING + if ! echo "$smart_info" | grep -q "SMART support is:"; then + echo "not_available" + return + fi + + # Extract SMART status - FIXED LOGIC + local smart_support_line=$(echo "$smart_info" | grep "SMART support is:") + local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "") + local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "") + + if [[ -z "$smart_available" ]]; then + echo "not_available" + return + fi + + if [[ -z "$smart_enabled" ]]; then + echo "disabled" + return + fi + + # Test attribute reading + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then + echo "no_attributes" + return + fi + + echo "full_access" +} + +# Function to get disk information with enhanced SAS support +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + + # Extract information with multiple fallbacks for SAS drives + local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" + + local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) + + local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) + + # Get disk type with SAS support + local disk_type="UNKNOWN" + if echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + elif echo "$info" | grep -qi "SCSI\|SAS"; then + # SAS drives often don't specify, check rotation rate + if echo "$info" | grep -qi "15000\|10000\|7200"; then + disk_type="HDD" + else + disk_type="SSD" + fi + fi + + # Extract SMART attributes with multiple field attempts + local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1) + + local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + + local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) + + # For Kingston and other SSDs with different attribute names + local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) + + # For wear leveling indicators + local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1) + + echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" +} + +# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON +calculate_tbw() { + local raw_value=$1 + local sectors=$2 + local disk_model=$3 + + # Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Convert from GiB to TB + local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + return + fi + fi + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + local bytes=$((sectors * 512)) + local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + else + echo "0" + fi +} + +# Function to estimate SSD endurance based on model and capacity +estimate_ssd_endurance() { + local disk_model=$1 + local capacity_gb=$2 + + # Kingston consumer SSDs + if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then + if [[ $capacity_gb -ge 960 ]]; then + echo "300" # 300TB for 960GB Kingston SA400 + elif [[ $capacity_gb -ge 480 ]]; then + echo "150" # 150TB for 480GB Kingston + else + echo "80" # 80TB for smaller Kingston + fi + # SAS SSDs typically have very high endurance + elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then + # Enterprise SAS SSDs - very high endurance + if [[ $capacity_gb -ge 1000 ]]; then + echo "10000" # 10PB for 1TB+ enterprise SAS SSD + elif [[ $capacity_gb -ge 600 ]]; then + echo "6000" # 6PB for 600GB enterprise SAS SSD + elif [[ $capacity_gb -ge 400 ]]; then + echo "4000" # 4PB for 400GB enterprise SAS SSD + else + echo "2000" # 2PB for smaller enterprise SAS SSD + fi + elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then + # Enterprise SATA/NVMe SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "1200" # 1.2PB for 1TB enterprise + elif [[ $capacity_gb -ge 480 ]]; then + echo "600" # 600TB for 480GB enterprise + elif [[ $capacity_gb -ge 240 ]]; then + echo "300" # 300TB for 240GB enterprise + else + echo "150" # 150TB for smaller enterprise + fi + else + # Consumer SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "600" # 600TB for 1TB consumer + elif [[ $capacity_gb -ge 480 ]]; then + echo "300" # 300TB for 480GB consumer + elif [[ $capacity_gb -ge 240 ]]; then + echo "150" # 150TB for 240GB consumer + elif [[ $capacity_gb -ge 120 ]]; then + echo "80" # 80TB for 120GB consumer + else + echo "40" # 40TB for smaller drives + fi + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local disk_model=$3 + local capacity_gb=$4 + local media_wearout=$5 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown||Unknown" + return + fi + + local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") + + # If we have media wearout indicator, use it for more accurate estimation + if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then + # For Kingston, SSD_Life_Left is already a percentage + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + else + # For other drives, media_wearout might be countdown from 100 + local wear_percent=$media_wearout + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + fi + return + fi + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + fi + else + echo "Unknown|${estimated_endurance} TB|New|estimated" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + + if [[ -z "$power_on_hours" ]]; then + echo "Unknown" + return + fi + + power_on_hours=${power_on_hours:-0} + reallocated_sectors=${reallocated_sectors:-0} + pending_sectors=${pending_sectors:-0} + + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + elif [[ "$reallocated_sectors" -gt 100 ]]; then + echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + elif [[ "$reallocated_sectors" -gt 10 ]]; then + echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" + elif [[ "$power_on_hours" -gt 40000 ]]; then + echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)" + elif [[ "$power_on_hours" -gt 25000 ]]; then + echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)" + else + echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)" + fi +} + +# Function to check a single disk with enhanced error handling +check_disk() { + local disk=$1 + local controller=$2 + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Test SMART access level + local access_level=$(test_smart_access "$disk" "$controller") + + case $access_level in + "no_access") + print_color $RED "ERROR: Cannot access disk through controller" + echo "Possible reasons:" + echo " - Controller doesn't support SMART passthrough" + echo " - Disk is part of a hardware RAID array" + echo " - Insufficient permissions (try running as root)" + echo " - Controller busy or offline" + echo "" + return + ;; + "not_available") + print_color $YELLOW "SMART not available on this disk" + echo "This disk does not support SMART monitoring" + echo "" + return + ;; + "disabled") + print_color $YELLOW "SMART is disabled on this disk" + echo "SMART is available but currently disabled" + echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" + echo "" + return + ;; + "no_attributes") + print_color $YELLOW "WARNING: Cannot read SMART attributes" + echo "This is common with hardware RAID controllers like PERC H730P" + echo "Try checking through the RAID management interface" + echo "" + return + ;; + "limited_attributes") + print_color $YELLOW "NOTE: Limited SMART data available" + echo "Controller is filtering some SMART attributes" + ;; + esac + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + + # Only show power on hours if available + if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then + echo "Power On Hours: $power_on_hours" + else + echo "Power On Hours: Unknown" + fi + + # Disk type specific analysis + if [[ "$disk_type" == "SSD" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "" "$total_written" "$model") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model") + fi + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + echo "TBW Used: ${tbw_used} TB" + fi + + # Estimate capacity for endurance calculation + local capacity_gb=0 + if echo "$capacity" | grep -qi "GB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) + elif echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) + fi + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) + + if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + echo "TBW Remaining: $tbw_remaining" + fi + + echo "Lifespan: $lifespan_percent ($wear_status)" + + # Show wear source if available + if [[ "$wear_source" == "media_wearout" ]]; then + echo "Wear Source: Media Wearout Indicator" + elif [[ "$wear_source" == "tbw" ]]; then + echo "Wear Source: TBW Calculation" + elif [[ "$wear_source" == "estimated" ]]; then + echo "Wear Source: Estimated Endurance" + fi + + elif [[ "$disk_type" == "HDD" ]]; then + if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + fi + if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then + echo "Pending Sectors: $pending_sectors" + fi + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") + echo "Lifespan: $lifespan" + else + print_color $YELLOW "Limited information available for this disk type" + echo "This is normal for hardware RAID configurations like PERC H730P" + echo "For detailed SAS drive information, use controller management tools" + fi + + echo "" +} + +# Function to detect all disks with enhanced SAS support (no partitions) - FIXED +detect_disks() { + local disks=() + + # Check for SATA/SAS disks - only main devices, no partitions + for disk in /dev/sd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks - only main devices, no partitions + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for SAS disks via SCSI generic - only main devices + for disk in /dev/sg[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types - only main devices + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Function to detect RAID controllers (Ubuntu specific) - FIXED +detect_raid_controllers() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") + local raid_disks=() + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different disk devices for each controller + for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do + if [[ -b "$base_disk" ]]; then + if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then + raid_disks+=("$base_disk:$controller,$i") + break + fi + fi + done + done + done + + echo "${raid_disks[@]}" +} + +# Main function - FIXED +main() { + print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" + print_color $BLUE "Enhanced with PERC H730P and SAS Support" + print_color $BLUE "============================================" + echo "" + + check_dependencies + + local disks=() + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks - FIXED: don't mix output with disk detection + print_color $CYAN "Auto-detecting disks (excluding partitions)..." + local direct_disks=() + read -ra direct_disks <<< "$(detect_disks)" + + print_color $CYAN "Scanning for RAID controllers..." + local raid_disks=() + read -ra raid_disks <<< "$(detect_raid_controllers)" + + # Combine both lists + disks=("${direct_disks[@]}" "${raid_disks[@]}") + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root." + print_color $YELLOW "Some disks/controllers may show limited information." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk_info in "${disks[@]}"; do + # Check if this is a RAID disk (has controller specified) + if [[ "$disk_info" == *":"* ]]; then + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + else + check_disk "$disk_info" + fi + done + + print_color $BLUE "Check completed!" + echo "" + print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" + print_color $CYAN " - Install 'storcli' for detailed controller information" + print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" + print_color $CYAN " - Hardware RAID controllers often limit SMART data access" + echo "" + print_color $CYAN "Ubuntu-specific tips:" + print_color $CYAN " - Use 'lsblk' to see all available block devices" + print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" + echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/old/obsolete/ubuntu-v2.6.sh b/old/obsolete/ubuntu-v2.6.sh new file mode 100644 index 0000000..44ad14b --- /dev/null +++ b/old/obsolete/ubuntu-v2.6.sh @@ -0,0 +1,650 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu 24.04 +# Enhanced with SAS/PERC H730P controller support +# Checks SSD TBW/lifespan and HDD health status + +SCRIPT_NAME=$(basename "$0") +VERSION="2.6" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# Function to test SMART access and get available data - ENHANCED FOR NVMe +test_smart_access() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + # Test basic SMART access + if ! $smart_cmd -i "$disk" &>/dev/null; then + echo "no_access" + return + fi + + # Get SMART information + local smart_info=$($smart_cmd -i "$disk" 2>/dev/null) + + # Check if this is an NVMe drive + if echo "$smart_info" | grep -qi "NVMe"; then + # NVMe drives have different SMART implementation + if $smart_cmd -H "$disk" &>/dev/null; then + echo "full_access" + else + echo "no_attributes" + fi + return + fi + + # Check if SMART is available for SATA/SAS + if ! echo "$smart_info" | grep -q "SMART support is:"; then + echo "not_available" + return + fi + + # Extract SMART status + local smart_support_line=$(echo "$smart_info" | grep "SMART support is:") + local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "") + local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "") + + if [[ -z "$smart_available" ]]; then + echo "not_available" + return + fi + + if [[ -z "$smart_enabled" ]]; then + echo "disabled" + return + fi + + # Test attribute reading + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then + echo "no_attributes" + return + fi + + echo "full_access" +} + +# Function to get disk information with enhanced SAS and NVMe support +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + + # Extract information with multiple fallbacks + local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" + + local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) + + local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) + + # Get disk type + local disk_type="UNKNOWN" + if echo "$info" | grep -qi "Solid State Device\|NVMe"; then + disk_type="SSD" + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + elif echo "$info" | grep -qi "SCSI\|SAS"; then + if echo "$info" | grep -qi "15000\|10000\|7200"; then + disk_type="HDD" + else + disk_type="SSD" + fi + fi + + # Extract SMART attributes with multiple field attempts + local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1 | sed 's/[^0-9]//g') + + local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + + local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) + + # For Kingston and other SSDs with different attribute names + local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB\|Data Units Written" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) + + # For wear leveling indicators + local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left\|Percentage Used\|Available Spare" | awk '{print $10}' | head -1) + + echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" +} + +# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe +calculate_tbw() { + local raw_value=$1 + local sectors=$2 + local disk_model=$3 + local attribute_name=$4 + + # Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Convert from GiB to TB + local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + return + fi + fi + + # NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+, 512,000 bytes for older) + if echo "$attribute_name" | grep -qi "Data Units Written"; then + if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Convert from data units to TB (assuming 1,000,000 bytes per unit) + local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null) + local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + return + fi + fi + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + local bytes=$((sectors * 512)) + local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + else + echo "0" + fi +} + +# Function to estimate SSD endurance based on model and capacity +estimate_ssd_endurance() { + local disk_model=$1 + local capacity_gb=$2 + + # Kingston consumer SSDs + if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then + if [[ $capacity_gb -ge 960 ]]; then + echo "300" # 300TB for 960GB Kingston SA400 + elif [[ $capacity_gb -ge 480 ]]; then + echo "150" # 150TB for 480GB Kingston + else + echo "80" # 80TB for smaller Kingston + fi + # NVMe SSDs typically have higher endurance + elif echo "$disk_model" | grep -qi "NVMe"; then + if [[ $capacity_gb -ge 2000 ]]; then + echo "1200" # 1.2PB for 2TB+ NVMe + elif [[ $capacity_gb -ge 1000 ]]; then + echo "600" # 600TB for 1TB NVMe + elif [[ $capacity_gb -ge 500 ]]; then + echo "300" # 300TB for 500GB NVMe + else + echo "150" # 150TB for smaller NVMe + fi + # SAS SSDs typically have very high endurance + elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then + if [[ $capacity_gb -ge 1000 ]]; then + echo "10000" # 10PB for 1TB+ enterprise SAS SSD + elif [[ $capacity_gb -ge 600 ]]; then + echo "6000" # 6PB for 600GB enterprise SAS SSD + elif [[ $capacity_gb -ge 400 ]]; then + echo "4000" # 4PB for 400GB enterprise SAS SSD + else + echo "2000" # 2PB for smaller enterprise SAS SSD + fi + elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then + # Enterprise SATA/NVMe SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "1200" # 1.2PB for 1TB enterprise + elif [[ $capacity_gb -ge 480 ]]; then + echo "600" # 600TB for 480GB enterprise + elif [[ $capacity_gb -ge 240 ]]; then + echo "300" # 300TB for 240GB enterprise + else + echo "150" # 150TB for smaller enterprise + fi + else + # Consumer SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "600" # 600TB for 1TB consumer + elif [[ $capacity_gb -ge 480 ]]; then + echo "300" # 300TB for 480GB consumer + elif [[ $capacity_gb -ge 240 ]]; then + echo "150" # 150TB for 240GB consumer + elif [[ $capacity_gb -ge 120 ]]; then + echo "80" # 80TB for 120GB consumer + else + echo "40" # 40TB for smaller drives + fi + fi +} + +# Function to estimate SSD lifespan with TBW remaining - ENHANCED +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local disk_model=$3 + local capacity_gb=$4 + local media_wearout=$5 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown||Unknown" + return + fi + + local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") + + # If we have media wearout indicator, use it for more accurate estimation + if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then + # For Kingston, SSD_Life_Left is already a percentage + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + else + # For other drives, media_wearout might be countdown from 100 + local wear_percent=$media_wearout + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + fi + return + fi + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + fi + else + echo "Unknown|${estimated_endurance} TB|New|estimated" + fi +} + +# Function to estimate HDD lifespan - FIXED POWER_ON_HOURS PARSING +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + + # Clean power_on_hours to extract just the numeric part + local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') + clean_hours=${clean_hours:-0} + + if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + reallocated_sectors=${reallocated_sectors:-0} + pending_sectors=${pending_sectors:-0} + + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + elif [[ "$reallocated_sectors" -gt 100 ]]; then + echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + elif [[ "$reallocated_sectors" -gt 10 ]]; then + echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" + elif [[ "$clean_hours" -gt 40000 ]]; then + echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)" + elif [[ "$clean_hours" -gt 25000 ]]; then + echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)" + else + echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)" + fi +} + +# Function to check a single disk with enhanced error handling +check_disk() { + local disk=$1 + local controller=$2 + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Test SMART access level + local access_level=$(test_smart_access "$disk" "$controller") + + case $access_level in + "no_access") + print_color $RED "ERROR: Cannot access disk through controller" + echo "Possible reasons:" + echo " - Controller doesn't support SMART passthrough" + echo " - Disk is part of a hardware RAID array" + echo " - Insufficient permissions (try running as root)" + echo " - Controller busy or offline" + echo "" + return + ;; + "not_available") + print_color $YELLOW "SMART not available on this disk" + echo "This disk does not support SMART monitoring" + echo "" + return + ;; + "disabled") + print_color $YELLOW "SMART is disabled on this disk" + echo "SMART is available but currently disabled" + echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" + echo "" + return + ;; + "no_attributes") + print_color $YELLOW "WARNING: Cannot read SMART attributes" + echo "This is common with hardware RAID controllers like PERC H730P" + echo "Try checking through the RAID management interface" + echo "" + return + ;; + "limited_attributes") + print_color $YELLOW "NOTE: Limited SMART data available" + echo "Controller is filtering some SMART attributes" + ;; + esac + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + + # Only show power on hours if available + if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then + echo "Power On Hours: $power_on_hours" + else + echo "Power On Hours: Unknown" + fi + + # Disk type specific analysis + if [[ "$disk_type" == "SSD" ]]; then + # Get the actual attribute name for TBW calculation + local attributes=$(smartctl -A "$disk" 2>/dev/null) + local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written" | head -1 | awk '{print $2}') + + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "" "$total_written" "$model" "$tbw_attribute_name") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name") + fi + + # Always show TBW information for SSDs + echo "TBW Used: ${tbw_used} TB" + + # Estimate capacity for endurance calculation + local capacity_gb=0 + if echo "$capacity" | grep -qi "GB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) + elif echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) + fi + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) + + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + + # Show wear source if available + if [[ "$wear_source" == "media_wearout" ]]; then + echo "Wear Source: Media Wearout Indicator" + elif [[ "$wear_source" == "tbw" ]]; then + echo "Wear Source: TBW Calculation" + elif [[ "$wear_source" == "estimated" ]]; then + echo "Wear Source: Estimated Endurance" + fi + + elif [[ "$disk_type" == "HDD" ]]; then + if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + fi + if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then + echo "Pending Sectors: $pending_sectors" + fi + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") + echo "Lifespan: $lifespan" + else + print_color $YELLOW "Limited information available for this disk type" + echo "This is normal for hardware RAID configurations like PERC H730P" + echo "For detailed SAS drive information, use controller management tools" + fi + + echo "" +} + +# Function to detect all disks with enhanced SAS support (no partitions) +detect_disks() { + local disks=() + + # Check for SATA/SAS disks - only main devices, no partitions + for disk in /dev/sd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks - only main devices, no partitions + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for SAS disks via SCSI generic - only main devices + for disk in /dev/sg[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types - only main devices + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Function to detect RAID controllers (Ubuntu specific) +detect_raid_controllers() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") + local raid_disks=() + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different disk devices for each controller + for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do + if [[ -b "$base_disk" ]]; then + if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then + raid_disks+=("$base_disk:$controller,$i") + break + fi + fi + done + done + done + + echo "${raid_disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" + print_color $BLUE "Enhanced with PERC H730P and SAS Support" + print_color $BLUE "============================================" + echo "" + + check_dependencies + + local disks=() + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks (excluding partitions)..." + local direct_disks=() + read -ra direct_disks <<< "$(detect_disks)" + + print_color $CYAN "Scanning for RAID controllers..." + local raid_disks=() + read -ra raid_disks <<< "$(detect_raid_controllers)" + + # Combine both lists + disks=("${direct_disks[@]}" "${raid_disks[@]}") + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root." + print_color $YELLOW "Some disks/controllers may show limited information." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk_info in "${disks[@]}"; do + # Check if this is a RAID disk (has controller specified) + if [[ "$disk_info" == *":"* ]]; then + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + else + check_disk "$disk_info" + fi + done + + print_color $BLUE "Check completed!" + echo "" + print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" + print_color $CYAN " - Install 'storcli' for detailed controller information" + print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" + print_color $CYAN " - Hardware RAID controllers often limit SMART data access" + echo "" + print_color $CYAN "Ubuntu-specific tips:" + print_color $CYAN " - Use 'lsblk' to see all available block devices" + print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" + echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/old/obsolete/ubuntu-v2.7.sh b/old/obsolete/ubuntu-v2.7.sh new file mode 100644 index 0000000..e90ba2a --- /dev/null +++ b/old/obsolete/ubuntu-v2.7.sh @@ -0,0 +1,690 @@ +#!/bin/bash + +# Disk Health Check Script for Ubuntu 24.04 +# Enhanced with SAS/PERC H730P controller support +# Checks SSD TBW/lifespan and HDD health status + +SCRIPT_NAME=$(basename "$0") +VERSION="2.7" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check dependencies +check_dependencies() { + local missing=() + + if ! command_exists smartctl; then + missing+=("smartmontools") + fi + + if ! command_exists bc; then + missing+=("bc") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + print_color $RED "Error: Missing required packages: ${missing[*]}" + echo "Install with: sudo apt update && sudo apt install ${missing[*]}" + exit 1 + fi +} + +# Function to test SMART access and get available data +test_smart_access() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + # Test basic SMART access + if ! $smart_cmd -i "$disk" &>/dev/null; then + echo "no_access" + return + fi + + echo "full_access" +} + +# Function to get disk information with enhanced SAS and NVMe support +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + + # For NVMe drives, use -x for extended information + if echo "$info" | grep -qi "NVMe"; then + local nvme_info=$($smart_cmd -x "$disk" 2>/dev/null) + attributes="$nvme_info" + fi + + # Extract information with multiple fallbacks + local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" + + local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) + + local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + + local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) + + # Get disk type + local disk_type="UNKNOWN" + if echo "$info" | grep -qi "Solid State Device\|NVMe"; then + disk_type="SSD" + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + elif echo "$info" | grep -qi "SCSI\|SAS"; then + if echo "$info" | grep -qi "15000\|10000\|7200"; then + disk_type="HDD" + else + disk_type="SSD" + fi + fi + + # Extract SMART attributes with multiple field attempts + local power_on_hours="" + + # Try different power on hours attributes + if echo "$attributes" | grep -qi "Power_On_Hours"; then + power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | head -1 | awk '{print $10}') + elif echo "$attributes" | grep -qi "Power On Hours"; then + power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}') + fi + + # Clean power_on_hours to remove non-numeric characters for HDDs + if [[ "$disk_type" == "HDD" ]]; then + power_on_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') + fi + + local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + + local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) + + # For Kingston and other SSDs with different attribute names + local total_written="" + local host_writes_32mib="" + + # Try different write attributes for different drive types + if echo "$model" | grep -qi "KINGSTON.*SA400"; then + # Kingston SA400 uses Flash_Writes_GiB and Lifetime_Writes_GiB + total_written=$(echo "$attributes" | grep -i "Flash_Writes_GiB" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB" | awk '{print $NF}' | head -1) + elif echo "$info" | grep -qi "NVMe"; then + # NVMe drives use Data Units Written + total_written=$(echo "$attributes" | grep -i "Data Units Written" | head -1 | awk '{print $NF}') + # Also try to get power on hours from NVMe + if [[ -z "$power_on_hours" ]]; then + power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}') + fi + else + # Standard SATA SSDs + total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | awk '{print $10}' | head -1) + host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) + fi + + # For wear leveling indicators + local media_wearout="" + if echo "$model" | grep -qi "KINGSTON.*SA400"; then + media_wearout=$(echo "$attributes" | grep -i "SSD_Life_Left" | awk '{print $NF}' | head -1) + elif echo "$info" | grep -qi "NVMe"; then + media_wearout=$(echo "$attributes" | grep -i "Percentage Used" | head -1 | awk '{print $NF}') + [[ -z "$media_wearout" ]] && media_wearout=$(echo "$attributes" | grep -i "Available Spare" | head -1 | awk '{print $NF}') + else + media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count" | awk '{print $10}' | head -1) + fi + + # If media_wearout is percentage used for NVMe, convert to remaining life + if echo "$info" | grep -qi "NVMe" && [[ -n "$media_wearout" ]]; then + if echo "$attributes" | grep -qi "Percentage Used"; then + # Convert percentage used to percentage remaining + media_wearout=$((100 - media_wearout)) + fi + fi + + echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" +} + +# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe +calculate_tbw() { + local raw_value=$1 + local sectors=$2 + local disk_model=$3 + local attribute_name=$4 + + # Kingston SA400 SSDs use Flash_Writes_GiB (value in GiB) + if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then + if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Convert from GiB to TB (1 TiB = 1024 GiB, but using 1000 for TB) + local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + return + fi + fi + + # NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+) + if echo "$disk_model" | grep -qi "NVMe"; then + if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Convert from data units to TB (1 unit = 1,000,000 bytes) + local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null) + local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + return + fi + fi + + # Standard SATA SSDs with Total_LBAs_Written + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + local bytes=$((sectors * 512)) + local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + # Host_Writes_32MiB (value in 32MiB chunks) + local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") + echo "$tbw" + else + echo "0" + fi +} + +# Function to estimate SSD endurance based on model and capacity +estimate_ssd_endurance() { + local disk_model=$1 + local capacity_gb=$2 + + # Kingston consumer SSDs + if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then + if [[ $capacity_gb -ge 960 ]]; then + echo "300" # 300TB for 960GB Kingston SA400 + elif [[ $capacity_gb -ge 480 ]]; then + echo "150" # 150TB for 480GB Kingston + else + echo "80" # 80TB for smaller Kingston + fi + # Kingston NVMe SSDs + elif echo "$disk_model" | grep -qi "KINGSTON.*SA2000"; then + if [[ $capacity_gb -ge 2000 ]]; then + echo "800" # 800TB for 2TB Kingston NVMe + elif [[ $capacity_gb -ge 1000 ]]; then + echo "400" # 400TB for 1TB Kingston NVMe + elif [[ $capacity_gb -ge 500 ]]; then + echo "200" # 200TB for 500GB Kingston NVMe + else + echo "100" # 100TB for smaller Kingston NVMe + fi + # NVMe SSDs typically have higher endurance + elif echo "$disk_model" | grep -qi "NVMe"; then + if [[ $capacity_gb -ge 2000 ]]; then + echo "1200" # 1.2PB for 2TB+ NVMe + elif [[ $capacity_gb -ge 1000 ]]; then + echo "600" # 600TB for 1TB NVMe + elif [[ $capacity_gb -ge 500 ]]; then + echo "300" # 300TB for 500GB NVMe + else + echo "150" # 150TB for smaller NVMe + fi + # SAS SSDs typically have very high endurance + elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then + if [[ $capacity_gb -ge 1000 ]]; then + echo "10000" # 10PB for 1TB+ enterprise SAS SSD + elif [[ $capacity_gb -ge 600 ]]; then + echo "6000" # 6PB for 600GB enterprise SAS SSD + elif [[ $capacity_gb -ge 400 ]]; then + echo "4000" # 4PB for 400GB enterprise SAS SSD + else + echo "2000" # 2PB for smaller enterprise SAS SSD + fi + elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then + # Enterprise SATA/NVMe SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "1200" # 1.2PB for 1TB enterprise + elif [[ $capacity_gb -ge 480 ]]; then + echo "600" # 600TB for 480GB enterprise + elif [[ $capacity_gb -ge 240 ]]; then + echo "300" # 300TB for 240GB enterprise + else + echo "150" # 150TB for smaller enterprise + fi + else + # Consumer SSDs + if [[ $capacity_gb -ge 1000 ]]; then + echo "600" # 600TB for 1TB consumer + elif [[ $capacity_gb -ge 480 ]]; then + echo "300" # 300TB for 480GB consumer + elif [[ $capacity_gb -ge 240 ]]; then + echo "150" # 150TB for 240GB consumer + elif [[ $capacity_gb -ge 120 ]]; then + echo "80" # 80TB for 120GB consumer + else + echo "40" # 40TB for smaller drives + fi + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local disk_model=$3 + local capacity_gb=$4 + local media_wearout=$5 + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown||Unknown||Unknown" + return + fi + + local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") + + # Handle case where estimated_endurance might be empty + if [[ -z "$estimated_endurance" || "$estimated_endurance" -eq 0 ]]; then + echo "Unknown||Unknown||Unknown" + return + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") + + # If we have media wearout indicator, use it for more accurate estimation + if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then + # For Kingston, SSD_Life_Left is already a percentage + if echo "$disk_model" | grep -qi "KINGSTON"; then + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + else + # For other drives, media_wearout might be countdown from 100 + local wear_percent=$media_wearout + if [[ $media_wearout -le 10 ]]; then + echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" + elif [[ $media_wearout -le 30 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" + elif [[ $media_wearout -le 70 ]]; then + echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" + else + echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" + fi + fi + return + fi + + if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") + + if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + fi + else + echo "Unknown|${estimated_endurance} TB|New|estimated" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + + # Clean power_on_hours to extract just the numeric part + local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') + clean_hours=${clean_hours:-0} + + if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + reallocated_sectors=${reallocated_sectors:-0} + pending_sectors=${pending_sectors:-0} + + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + elif [[ "$reallocated_sectors" -gt 100 ]]; then + echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + elif [[ "$reallocated_sectors" -gt 10 ]]; then + echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" + elif [[ "$clean_hours" -gt 40000 ]]; then + echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)" + elif [[ "$clean_hours" -gt 25000 ]]; then + echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)" + else + echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)" + fi +} + +# Function to check a single disk with enhanced error handling +check_disk() { + local disk=$1 + local controller=$2 + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Test SMART access level + local access_level=$(test_smart_access "$disk" "$controller") + + case $access_level in + "no_access") + print_color $RED "ERROR: Cannot access disk through controller" + echo "Possible reasons:" + echo " - Controller doesn't support SMART passthrough" + echo " - Disk is part of a hardware RAID array" + echo " - Insufficient permissions (try running as root)" + echo " - Controller busy or offline" + echo "" + return + ;; + "not_available") + print_color $YELLOW "SMART not available on this disk" + echo "This disk does not support SMART monitoring" + echo "" + return + ;; + "disabled") + print_color $YELLOW "SMART is disabled on this disk" + echo "SMART is available but currently disabled" + echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" + echo "" + return + ;; + "no_attributes") + print_color $YELLOW "WARNING: Cannot read SMART attributes" + echo "This is common with hardware RAID controllers like PERC H730P" + echo "Try checking through the RAID management interface" + echo "" + return + ;; + "limited_attributes") + print_color $YELLOW "NOTE: Limited SMART data available" + echo "Controller is filtering some SMART attributes" + ;; + esac + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Capacity: ${capacity:-Unknown}" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + + # Only show power on hours if available + if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then + echo "Power On Hours: $power_on_hours" + else + echo "Power On Hours: Unknown" + fi + + # Disk type specific analysis + if [[ "$disk_type" == "SSD" ]]; then + # Get the actual attribute name for TBW calculation + local attributes="" + if echo "$model" | grep -qi "NVMe"; then + attributes=$(smartctl -x "$disk" 2>/dev/null) + else + attributes=$(smartctl -A "$disk" 2>/dev/null) + fi + + local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written\|Total_LBAs_Written" | head -1 | awk '{print $2}') + + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$total_written" "" "$model" "$tbw_attribute_name") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name") + fi + + # Always show TBW information for SSDs + echo "TBW Used: ${tbw_used} TB" + + # Estimate capacity for endurance calculation + local capacity_gb=0 + if echo "$capacity" | grep -qi "GB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) + elif echo "$capacity" | grep -qi "TB"; then + capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) + else + # Try to extract capacity from raw number + capacity_gb=$(echo "$capacity" | grep -o '[0-9]*' | head -1) + capacity_gb=$((capacity_gb / 1000000000)) + fi + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) + + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + + # Show wear source if available + if [[ "$wear_source" == "media_wearout" ]]; then + echo "Wear Source: Media Wearout Indicator" + elif [[ "$wear_source" == "tbw" ]]; then + echo "Wear Source: TBW Calculation" + elif [[ "$wear_source" == "estimated" ]]; then + echo "Wear Source: Estimated Endurance" + fi + + elif [[ "$disk_type" == "HDD" ]]; then + if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + fi + if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then + echo "Pending Sectors: $pending_sectors" + fi + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") + echo "Lifespan: $lifespan" + else + print_color $YELLOW "Limited information available for this disk type" + echo "This is normal for hardware RAID configurations like PERC H730P" + echo "For detailed SAS drive information, use controller management tools" + fi + + echo "" +} + +# Function to detect all disks with enhanced SAS support (no partitions) +detect_disks() { + local disks=() + + # Check for SATA/SAS disks - only main devices, no partitions + for disk in /dev/sd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for NVMe disks - only main devices, no partitions + for disk in /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for SAS disks via SCSI generic - only main devices + for disk in /dev/sg[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + # Check for other disk types - only main devices + for disk in /dev/vd[a-z] /dev/xvd[a-z]; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + fi + done + + echo "${disks[@]}" +} + +# Function to detect RAID controllers (Ubuntu specific) +detect_raid_controllers() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") + local raid_disks=() + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + # Try different disk devices for each controller + for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do + if [[ -b "$base_disk" ]]; then + if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then + raid_disks+=("$base_disk:$controller,$i") + break + fi + fi + done + done + done + + echo "${raid_disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" + print_color $BLUE "Enhanced with PERC H730P and SAS Support" + print_color $BLUE "============================================" + echo "" + + check_dependencies + + local disks=() + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks (excluding partitions)..." + local direct_disks=() + read -ra direct_disks <<< "$(detect_disks)" + + print_color $CYAN "Scanning for RAID controllers..." + local raid_disks=() + read -ra raid_disks <<< "$(detect_raid_controllers)" + + # Combine both lists + disks=("${direct_disks[@]}" "${raid_disks[@]}") + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check if running as root, warn if not + if [[ $EUID -ne 0 ]]; then + print_color $YELLOW "Warning: Not running as root." + print_color $YELLOW "Some disks/controllers may show limited information." + echo "For complete results, run as: sudo $0" + echo "" + fi + + # Check each disk + for disk_info in "${disks[@]}"; do + # Check if this is a RAID disk (has controller specified) + if [[ "$disk_info" == *":"* ]]; then + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + else + check_disk "$disk_info" + fi + done + + print_color $BLUE "Check completed!" + echo "" + print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" + print_color $CYAN " - Install 'storcli' for detailed controller information" + print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" + print_color $CYAN " - Hardware RAID controllers often limit SMART data access" + echo "" + print_color $CYAN "Ubuntu-specific tips:" + print_color $CYAN " - Use 'lsblk' to see all available block devices" + print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " sudo $SCRIPT_NAME # Check all disks (as root)" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" + echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/old/ubuntu-v2.8.sh b/old/obsolete/ubuntu-v2.8.sh old mode 100755 new mode 100644 similarity index 100% rename from old/ubuntu-v2.8.sh rename to old/obsolete/ubuntu-v2.8.sh diff --git a/old/ubuntu-v3.0.sh b/old/obsolete/ubuntu-v3.0.sh old mode 100755 new mode 100644 similarity index 100% rename from old/ubuntu-v3.0.sh rename to old/obsolete/ubuntu-v3.0.sh diff --git a/ubuntu-v2.3.sh b/old/ubuntu-v2.3.sh old mode 100755 new mode 100644 similarity index 100% rename from ubuntu-v2.3.sh rename to old/ubuntu-v2.3.sh diff --git a/ubuntu-v2.4.sh b/old/ubuntu-v2.4.sh old mode 100755 new mode 100644 similarity index 100% rename from ubuntu-v2.4.sh rename to old/ubuntu-v2.4.sh diff --git a/old/ubuntu-v2.5.sh b/old/ubuntu-v2.5.sh index bb64ff9..ea67d35 100755 --- a/old/ubuntu-v2.5.sh +++ b/old/ubuntu-v2.5.sh @@ -1,8 +1,8 @@ #!/bin/bash -# Disk Health Check Script for Ubuntu 24.04 -# Enhanced with SAS/PERC H730P controller support -# Checks SSD TBW/lifespan and HDD health status +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification SCRIPT_NAME=$(basename "$0") VERSION="2.5" @@ -13,6 +13,7 @@ GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) NC=$(tput sgr0) # Function to print colored output @@ -46,190 +47,153 @@ check_dependencies() { fi } -# Function to test SMART access and get available data - FIXED VERSION -test_smart_access() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - # Test basic SMART access - if ! $smart_cmd -i "$disk" &>/dev/null; then - echo "no_access" - return - fi - - # Get SMART information - local smart_info=$($smart_cmd -i "$disk" 2>/dev/null) - - # Check if SMART is available - FIXED PARSING - if ! echo "$smart_info" | grep -q "SMART support is:"; then - echo "not_available" - return - fi - - # Extract SMART status - FIXED LOGIC - local smart_support_line=$(echo "$smart_info" | grep "SMART support is:") - local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "") - local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "") - - if [[ -z "$smart_available" ]]; then - echo "not_available" - return - fi - - if [[ -z "$smart_enabled" ]]; then - echo "disabled" - return - fi - - # Test attribute reading - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then - echo "no_attributes" - return - fi - - echo "full_access" -} +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) -# Function to get disk information with enhanced SAS support -get_disk_info() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - local info=$($smart_cmd -i "$disk" 2>/dev/null) - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - local health=$($smart_cmd -H "$disk" 2>/dev/null) - - # Extract information with multiple fallbacks for SAS drives - local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" - - local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) - - local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) - - # Get disk type with SAS support - local disk_type="UNKNOWN" - if echo "$info" | grep -qi "Solid State Device"; then - disk_type="SSD" - elif echo "$info" | grep -qi "Rotation Rate"; then - disk_type="HDD" - elif echo "$info" | grep -qi "SCSI\|SAS"; then - # SAS drives often don't specify, check rotation rate - if echo "$info" | grep -qi "15000\|10000\|7200"; then - disk_type="HDD" - else - disk_type="SSD" - fi - fi - - # Extract SMART attributes with multiple field attempts - local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1) - - local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) - - local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) - - # For Kingston and other SSDs with different attribute names - local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1) - local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) - - # For wear leveling indicators - local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1) - - echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" -} +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) -# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON -calculate_tbw() { - local raw_value=$1 - local sectors=$2 - local disk_model=$3 +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") - # Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Convert from GiB to TB - local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier return fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" fi - - if [[ -n "$sectors" && "$sectors" != "0" ]]; then - local bytes=$((sectors * 512)) - local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" - elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" else echo "0" fi } -# Function to estimate SSD endurance based on model and capacity -estimate_ssd_endurance() { - local disk_model=$1 - local capacity_gb=$2 +# Function to get disk type and interface +get_disk_info() { + local disk=$1 - # Kingston consumer SSDs - if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then - if [[ $capacity_gb -ge 960 ]]; then - echo "300" # 300TB for 960GB Kingston SA400 - elif [[ $capacity_gb -ge 480 ]]; then - echo "150" # 150TB for 480GB Kingston - else - echo "80" # 80TB for smaller Kingston + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true fi - # SAS SSDs typically have very high endurance - elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then - # Enterprise SAS SSDs - very high endurance - if [[ $capacity_gb -ge 1000 ]]; then - echo "10000" # 10PB for 1TB+ enterprise SAS SSD - elif [[ $capacity_gb -ge 600 ]]; then - echo "6000" # 6PB for 600GB enterprise SAS SSD - elif [[ $capacity_gb -ge 400 ]]; then - echo "4000" # 4PB for 400GB enterprise SAS SSD + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") else - echo "2000" # 2PB for smaller enterprise SAS SSD - fi - elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then - # Enterprise SATA/NVMe SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "1200" # 1.2PB for 1TB enterprise - elif [[ $capacity_gb -ge 480 ]]; then - echo "600" # 600TB for 480GB enterprise - elif [[ $capacity_gb -ge 240 ]]; then - echo "300" # 300TB for 240GB enterprise - else - echo "150" # 150TB for smaller enterprise + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" else - # Consumer SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "600" # 600TB for 1TB consumer - elif [[ $capacity_gb -ge 480 ]]; then - echo "300" # 300TB for 480GB consumer - elif [[ $capacity_gb -ge 240 ]]; then - echo "150" # 150TB for 240GB consumer - elif [[ $capacity_gb -ge 120 ]]; then - echo "80" # 80TB for 120GB consumer - else - echo "40" # 40TB for smaller drives - fi + echo "${CONSUMER_TBW[$capacity_tier]}" fi } @@ -237,60 +201,40 @@ estimate_ssd_endurance() { estimate_ssd_lifespan() { local power_on_hours=$1 local tbw_used=$2 - local disk_model=$3 - local capacity_gb=$4 - local media_wearout=$5 + local estimated_endurance=$3 + local disk_type=$4 if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then - echo "Unknown||Unknown||Unknown" + echo "Unknown||Unknown|New" return fi - local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") - local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") - - # If we have media wearout indicator, use it for more accurate estimation - if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then - # For Kingston, SSD_Life_Left is already a percentage - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - else - # For other drives, media_wearout might be countdown from 100 - local wear_percent=$media_wearout - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - fi + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" return fi - if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then - echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then - echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" else - echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" fi else - echo "Unknown|${estimated_endurance} TB|New|estimated" + echo "Unknown|${estimated_endurance} TB|New|$disk_type" fi } @@ -299,185 +243,344 @@ estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 - if [[ -z "$power_on_hours" ]]; then + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then echo "Unknown" return fi - power_on_hours=${power_on_hours:-0} - reallocated_sectors=${reallocated_sectors:-0} - pending_sectors=${pending_sectors:-0} + local severity=0 + # Critical issues if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return elif [[ "$reallocated_sectors" -gt 100 ]]; then - echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + severity=$((severity + 3)) elif [[ "$reallocated_sectors" -gt 10 ]]; then - echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" - elif [[ "$power_on_hours" -gt 40000 ]]; then - echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)" - elif [[ "$power_on_hours" -gt 25000 ]]; then - echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)" + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" else - echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)" + echo "${GREEN}> 3 years${NC} (Healthy)" fi } -# Function to check a single disk with enhanced error handling +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity=$1 + local capacity_gb=$2 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to check a single disk check_disk() { local disk=$1 - local controller=$2 - print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + print_color $CYAN "Checking disk: $disk" echo "==================================================" - # Test SMART access level - local access_level=$(test_smart_access "$disk" "$controller") - - case $access_level in - "no_access") - print_color $RED "ERROR: Cannot access disk through controller" - echo "Possible reasons:" - echo " - Controller doesn't support SMART passthrough" - echo " - Disk is part of a hardware RAID array" - echo " - Insufficient permissions (try running as root)" - echo " - Controller busy or offline" - echo "" - return - ;; - "not_available") - print_color $YELLOW "SMART not available on this disk" - echo "This disk does not support SMART monitoring" - echo "" - return - ;; - "disabled") - print_color $YELLOW "SMART is disabled on this disk" - echo "SMART is available but currently disabled" - echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" - echo "" - return - ;; - "no_attributes") - print_color $YELLOW "WARNING: Cannot read SMART attributes" - echo "This is common with hardware RAID controllers like PERC H730P" - echo "Try checking through the RAID management interface" - echo "" - return - ;; - "limited_attributes") - print_color $YELLOW "NOTE: Limited SMART data available" - echo "Controller is filtering some SMART attributes" - ;; - esac + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi # Get disk information - local disk_info=$(get_disk_info "$disk" "$controller") - IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb") + + # If capacity extraction failed, try alternative method + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name or other methods + if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + capacity_human="1 TB" + elif [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + capacity_human="2 TB" + elif [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + capacity_human="500 GB" + elif [[ "$model" =~ 250[Gg] ]]; then + capacity_gb=250 + capacity_human="250 GB" + fi + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" - echo "Capacity: ${capacity:-Unknown}" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" - - # Only show power on hours if available - if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then - echo "Power On Hours: $power_on_hours" - else - echo "Power On Hours: Unknown" - fi + echo "Power On Hours: ${power_on_hours:-Unknown}" # Disk type specific analysis - if [[ "$disk_type" == "SSD" ]]; then + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then local tbw_used=0 if [[ -n "$total_written" && "$total_written" != "0" ]]; then - tbw_used=$(calculate_tbw "" "$total_written" "$model") + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then - tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model") + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") fi - if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - echo "TBW Used: ${tbw_used} TB" - fi + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") - # Estimate capacity for endurance calculation - local capacity_gb=0 - if echo "$capacity" | grep -qi "GB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) - elif echo "$capacity" | grep -qi "TB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) - fi + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" - local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) - local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) - if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + if [[ "$estimated_endurance" != "N/A" ]]; then echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" fi - echo "Lifespan: $lifespan_percent ($wear_status)" - - # Show wear source if available - if [[ "$wear_source" == "media_wearout" ]]; then - echo "Wear Source: Media Wearout Indicator" - elif [[ "$wear_source" == "tbw" ]]; then - echo "Wear Source: TBW Calculation" - elif [[ "$wear_source" == "estimated" ]]; then - echo "Wear Source: Estimated Endurance" + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" fi - - elif [[ "$disk_type" == "HDD" ]]; then - if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then - echo "Realloc Sectors: $reallocated_sectors" - fi - if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then - echo "Pending Sectors: $pending_sectors" - fi - - local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") - echo "Lifespan: $lifespan" else - print_color $YELLOW "Limited information available for this disk type" - echo "This is normal for hardware RAID configurations like PERC H730P" - echo "For detailed SAS drive information, use controller management tools" + print_color $YELLOW "Unknown disk type - limited information available" fi echo "" } -# Function to detect all disks with enhanced SAS support (no partitions) - FIXED +# Function to detect all disks detect_disks() { local disks=() - # Check for SATA/SAS disks - only main devices, no partitions - for disk in /dev/sd[a-z]; do + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for NVMe disks - only main devices, no partitions + # Check for NVMe disks (base devices only, no partitions) for disk in /dev/nvme[0-9]n[0-9]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for SAS disks via SCSI generic - only main devices - for disk in /dev/sg[0-9]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - # Check for other disk types - only main devices + # Check for other disk types for disk in /dev/vd[a-z] /dev/xvd[a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") @@ -487,40 +590,19 @@ detect_disks() { echo "${disks[@]}" } -# Function to detect RAID controllers (Ubuntu specific) - FIXED -detect_raid_controllers() { - local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") - local raid_disks=() - - # Check for RAID controllers - for controller in "${controllers[@]}"; do - for i in {0..31}; do - # Try different disk devices for each controller - for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do - if [[ -b "$base_disk" ]]; then - if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then - raid_disks+=("$base_disk:$controller,$i") - break - fi - fi - done - done - done - - echo "${raid_disks[@]}" -} - -# Main function - FIXED +# Main function main() { - print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" - print_color $BLUE "Enhanced with PERC H730P and SAS Support" - print_color $BLUE "============================================" + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" echo "" check_dependencies local disks=() + # Check for soft-raid first + check_mdraid + # If specific disk provided, check only that disk if [[ $# -gt 0 ]]; then for disk in "$@"; do @@ -531,17 +613,9 @@ main() { fi done else - # Auto-detect disks - FIXED: don't mix output with disk detection - print_color $CYAN "Auto-detecting disks (excluding partitions)..." - local direct_disks=() - read -ra direct_disks <<< "$(detect_disks)" - - print_color $CYAN "Scanning for RAID controllers..." - local raid_disks=() - read -ra raid_disks <<< "$(detect_raid_controllers)" - - # Combine both lists - disks=("${direct_disks[@]}" "${raid_disks[@]}") + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then @@ -555,33 +629,20 @@ main() { # Check if running as root, warn if not if [[ $EUID -ne 0 ]]; then - print_color $YELLOW "Warning: Not running as root." - print_color $YELLOW "Some disks/controllers may show limited information." + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." echo "For complete results, run as: sudo $0" echo "" fi # Check each disk - for disk_info in "${disks[@]}"; do - # Check if this is a RAID disk (has controller specified) - if [[ "$disk_info" == *":"* ]]; then - IFS=':' read -r disk controller <<< "$disk_info" - check_disk "$disk" "$controller" - else - check_disk "$disk_info" - fi + for disk in "${disks[@]}"; do + check_disk "$disk" done print_color $BLUE "Check completed!" echo "" - print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" - print_color $CYAN " - Install 'storcli' for detailed controller information" - print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" - print_color $CYAN " - Hardware RAID controllers often limit SMART data access" - echo "" - print_color $CYAN "Ubuntu-specific tips:" - print_color $CYAN " - Use 'lsblk' to see all available block devices" - print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." } # Usage information @@ -594,9 +655,10 @@ usage() { echo " $SCRIPT_NAME # Check all auto-detected disks" echo " sudo $SCRIPT_NAME # Check all disks (as root)" echo " $SCRIPT_NAME /dev/sda # Check specific disk" - echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" - echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" } # Parse command line arguments diff --git a/old/ubuntu-v2.6.sh b/old/ubuntu-v2.6.sh index 44ad14b..b75c9d3 100755 --- a/old/ubuntu-v2.6.sh +++ b/old/ubuntu-v2.6.sh @@ -1,8 +1,8 @@ #!/bin/bash -# Disk Health Check Script for Ubuntu 24.04 -# Enhanced with SAS/PERC H730P controller support -# Checks SSD TBW/lifespan and HDD health status +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification SCRIPT_NAME=$(basename "$0") VERSION="2.6" @@ -13,6 +13,7 @@ GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) NC=$(tput sgr0) # Function to print colored output @@ -46,473 +47,622 @@ check_dependencies() { fi } -# Function to test SMART access and get available data - ENHANCED FOR NVMe -test_smart_access() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - # Test basic SMART access - if ! $smart_cmd -i "$disk" &>/dev/null; then - echo "no_access" - return - fi - - # Get SMART information - local smart_info=$($smart_cmd -i "$disk" 2>/dev/null) - - # Check if this is an NVMe drive - if echo "$smart_info" | grep -qi "NVMe"; then - # NVMe drives have different SMART implementation - if $smart_cmd -H "$disk" &>/dev/null; then - echo "full_access" - else - echo "no_attributes" - fi - return - fi - - # Check if SMART is available for SATA/SAS - if ! echo "$smart_info" | grep -q "SMART support is:"; then - echo "not_available" - return - fi - - # Extract SMART status - local smart_support_line=$(echo "$smart_info" | grep "SMART support is:") - local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "") - local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "") - - if [[ -z "$smart_available" ]]; then - echo "not_available" - return - fi - - if [[ -z "$smart_enabled" ]]; then - echo "disabled" - return - fi - - # Test attribute reading - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then - echo "no_attributes" - return - fi - - echo "full_access" -} +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) -# Function to get disk information with enhanced SAS and NVMe support -get_disk_info() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - local info=$($smart_cmd -i "$disk" 2>/dev/null) - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - local health=$($smart_cmd -H "$disk" 2>/dev/null) - - # Extract information with multiple fallbacks - local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" - - local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) - - local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) - - # Get disk type - local disk_type="UNKNOWN" - if echo "$info" | grep -qi "Solid State Device\|NVMe"; then - disk_type="SSD" - elif echo "$info" | grep -qi "Rotation Rate"; then - disk_type="HDD" - elif echo "$info" | grep -qi "SCSI\|SAS"; then - if echo "$info" | grep -qi "15000\|10000\|7200"; then - disk_type="HDD" - else - disk_type="SSD" - fi - fi - - # Extract SMART attributes with multiple field attempts - local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1 | sed 's/[^0-9]//g') - - local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) - - local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) - - # For Kingston and other SSDs with different attribute names - local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB\|Data Units Written" | awk '{print $10}' | head -1) - local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) - - # For wear leveling indicators - local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left\|Percentage Used\|Available Spare" | awk '{print $10}' | head -1) - - echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" -} +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) -# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe -calculate_tbw() { - local raw_value=$1 - local sectors=$2 - local disk_model=$3 - local attribute_name=$4 +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") - # Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Convert from GiB to TB - local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier return fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" fi - - # NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+, 512,000 bytes for older) - if echo "$attribute_name" | grep -qi "Data Units Written"; then - if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Convert from data units to TB (assuming 1,000,000 bytes per unit) - local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null) - local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" - return - fi - fi - - if [[ -n "$sectors" && "$sectors" != "0" ]]; then - local bytes=$((sectors * 512)) - local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" - elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" else echo "0" fi } -# Function to estimate SSD endurance based on model and capacity -estimate_ssd_endurance() { - local disk_model=$1 - local capacity_gb=$2 +# Function to get disk type and interface +get_disk_info() { + local disk=$1 - # Kingston consumer SSDs - if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then - if [[ $capacity_gb -ge 960 ]]; then - echo "300" # 300TB for 960GB Kingston SA400 - elif [[ $capacity_gb -ge 480 ]]; then - echo "150" # 150TB for 480GB Kingston - else - echo "80" # 80TB for smaller Kingston + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true fi - # NVMe SSDs typically have higher endurance - elif echo "$disk_model" | grep -qi "NVMe"; then - if [[ $capacity_gb -ge 2000 ]]; then - echo "1200" # 1.2PB for 2TB+ NVMe - elif [[ $capacity_gb -ge 1000 ]]; then - echo "600" # 600TB for 1TB NVMe - elif [[ $capacity_gb -ge 500 ]]; then - echo "300" # 300TB for 500GB NVMe + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") else - echo "150" # 150TB for smaller NVMe - fi - # SAS SSDs typically have very high endurance - elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then - if [[ $capacity_gb -ge 1000 ]]; then - echo "10000" # 10PB for 1TB+ enterprise SAS SSD - elif [[ $capacity_gb -ge 600 ]]; then - echo "6000" # 6PB for 600GB enterprise SAS SSD - elif [[ $capacity_gb -ge 400 ]]; then - echo "4000" # 4PB for 400GB enterprise SAS SSD - else - echo "2000" # 2PB for smaller enterprise SAS SSD - fi - elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then - # Enterprise SATA/NVMe SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "1200" # 1.2PB for 1TB enterprise - elif [[ $capacity_gb -ge 480 ]]; then - echo "600" # 600TB for 480GB enterprise - elif [[ $capacity_gb -ge 240 ]]; then - echo "300" # 300TB for 240GB enterprise - else - echo "150" # 150TB for smaller enterprise + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" else - # Consumer SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "600" # 600TB for 1TB consumer - elif [[ $capacity_gb -ge 480 ]]; then - echo "300" # 300TB for 480GB consumer - elif [[ $capacity_gb -ge 240 ]]; then - echo "150" # 150TB for 240GB consumer - elif [[ $capacity_gb -ge 120 ]]; then - echo "80" # 80TB for 120GB consumer - else - echo "40" # 40TB for smaller drives - fi + echo "${CONSUMER_TBW[$capacity_tier]}" fi } -# Function to estimate SSD lifespan with TBW remaining - ENHANCED +# Function to estimate SSD lifespan with TBW remaining estimate_ssd_lifespan() { local power_on_hours=$1 local tbw_used=$2 - local disk_model=$3 - local capacity_gb=$4 - local media_wearout=$5 + local estimated_endurance=$3 + local disk_type=$4 if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then - echo "Unknown||Unknown||Unknown" + echo "Unknown||Unknown|New" return fi - local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") - local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") - - # If we have media wearout indicator, use it for more accurate estimation - if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then - # For Kingston, SSD_Life_Left is already a percentage - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - else - # For other drives, media_wearout might be countdown from 100 - local wear_percent=$media_wearout - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - fi + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" return fi - if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then - echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then - echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" else - echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" fi else - echo "Unknown|${estimated_endurance} TB|New|estimated" + echo "Unknown|${estimated_endurance} TB|New|$disk_type" fi } -# Function to estimate HDD lifespan - FIXED POWER_ON_HOURS PARSING +# Function to estimate HDD lifespan estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 - # Clean power_on_hours to extract just the numeric part - local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') - clean_hours=${clean_hours:-0} + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") - if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then echo "Unknown" return fi - reallocated_sectors=${reallocated_sectors:-0} - pending_sectors=${pending_sectors:-0} + local severity=0 + # Critical issues if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return elif [[ "$reallocated_sectors" -gt 100 ]]; then - echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + severity=$((severity + 3)) elif [[ "$reallocated_sectors" -gt 10 ]]; then - echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" - elif [[ "$clean_hours" -gt 40000 ]]; then - echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)" - elif [[ "$clean_hours" -gt 25000 ]]; then - echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)" + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" else - echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)" + echo "${GREEN}> 3 years${NC} (Healthy)" fi } -# Function to check a single disk with enhanced error handling +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats - IMPROVED FOR HDD/SAS +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Debug: Show what we're trying to parse + # echo "DEBUG: Parsing capacity: '$capacity'" >&2 + + # Try different patterns to extract capacity + if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: [1.82 TB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 1 - TB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: [500.1 GB] + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 2 - GB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then + # Pattern: 500,107,862,016 bytes + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 3 - bytes: $bytes, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then + # Pattern: 1.82TB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 4 - TB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then + # Pattern: 500.1GB + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 5 - GB size: $size, GB: $capacity_gb" >&2 + elif [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then + # Pattern: 2000398934016B + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + # echo "DEBUG: Pattern 6 - bytes: $bytes, GB: $capacity_gb" >&2 + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity_gb=$1 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to extract capacity from model name for HDD/SAS drives +extract_capacity_from_model() { + local model=$1 + local disk_type=$2 + + # Common HDD/SAS capacity patterns in model names + if [[ "$model" =~ 2[Tt][Bb] ]] || [[ "$model" =~ 2000[Gg] ]]; then + echo "2000" + elif [[ "$model" =~ 1[Tt][Bb] ]] || [[ "$model" =~ 1000[Gg] ]]; then + echo "1000" + elif [[ "$model" =~ 4[Tt][Bb] ]] || [[ "$model" =~ 4000[Gg] ]]; then + echo "4000" + elif [[ "$model" =~ 8[Tt][Bb] ]] || [[ "$model" =~ 8000[Gg] ]]; then + echo "8000" + elif [[ "$model" =~ 500[Gg] ]]; then + echo "500" + elif [[ "$model" =~ 250[Gg] ]]; then + echo "250" + else + # Try to extract numbers that look like capacities + local capacity_match=$(echo "$model" | grep -oE '[0-9]+[GT]B' | head -1) + if [[ -n "$capacity_match" ]]; then + if [[ "$capacity_match" =~ ([0-9]+)TB ]]; then + echo "$((${BASH_REMATCH[1]} * 1000))" + elif [[ "$capacity_match" =~ ([0-9]+)GB ]]; then + echo "${BASH_REMATCH[1]}" + fi + else + echo "0" + fi + fi +} + +# Function to check a single disk check_disk() { local disk=$1 - local controller=$2 - print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + print_color $CYAN "Checking disk: $disk" echo "==================================================" - # Test SMART access level - local access_level=$(test_smart_access "$disk" "$controller") - - case $access_level in - "no_access") - print_color $RED "ERROR: Cannot access disk through controller" - echo "Possible reasons:" - echo " - Controller doesn't support SMART passthrough" - echo " - Disk is part of a hardware RAID array" - echo " - Insufficient permissions (try running as root)" - echo " - Controller busy or offline" - echo "" - return - ;; - "not_available") - print_color $YELLOW "SMART not available on this disk" - echo "This disk does not support SMART monitoring" - echo "" - return - ;; - "disabled") - print_color $YELLOW "SMART is disabled on this disk" - echo "SMART is available but currently disabled" - echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" - echo "" - return - ;; - "no_attributes") - print_color $YELLOW "WARNING: Cannot read SMART attributes" - echo "This is common with hardware RAID controllers like PERC H730P" - echo "Try checking through the RAID management interface" - echo "" - return - ;; - "limited_attributes") - print_color $YELLOW "NOTE: Limited SMART data available" - echo "Controller is filtering some SMART attributes" - ;; - esac + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi # Get disk information - local disk_info=$(get_disk_info "$disk" "$controller") - IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human="" + + # If capacity extraction failed, try alternative methods + if [[ "$capacity_gb" -eq 0 ]]; then + # Try to get capacity from model name (especially for HDD/SAS) + local model_capacity=$(extract_capacity_from_model "$model" "$disk_type") + if [[ "$model_capacity" -gt 0 ]]; then + capacity_gb="$model_capacity" + capacity_human=$(get_human_capacity "$capacity_gb") + else + # Final fallback based on disk type and common sizes + if [[ "$disk_type" == "HDD" ]]; then + # Common HDD sizes + if [[ "$model" =~ ST2000 ]]; then + capacity_gb=2000 + elif [[ "$model" =~ ST1000 ]]; then + capacity_gb=1000 + elif [[ "$model" =~ ST4000 ]]; then + capacity_gb=4000 + elif [[ "$model" =~ ST3000 ]]; then + capacity_gb=3000 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "SSD" ]]; then + # Common SSD sizes + if [[ "$model" =~ 960[Gg] ]]; then + capacity_gb=960 + elif [[ "$model" =~ 480[Gg] ]]; then + capacity_gb=480 + elif [[ "$model" =~ 240[Gg] ]]; then + capacity_gb=240 + elif [[ "$model" =~ 120[Gg] ]]; then + capacity_gb=120 + else + capacity_gb=0 + fi + elif [[ "$disk_type" == "NVMe" ]]; then + # Common NVMe sizes + if [[ "$model" =~ 500[Gg] ]]; then + capacity_gb=500 + elif [[ "$model" =~ 1000[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then + capacity_gb=1000 + elif [[ "$model" =~ 2000[Gg] ]] || [[ "$model" =~ 2[Tt] ]]; then + capacity_gb=2000 + else + capacity_gb=0 + fi + fi + capacity_human=$(get_human_capacity "$capacity_gb") + fi + else + capacity_human=$(get_human_capacity "$capacity_gb") + fi + + # If we still don't have capacity, show unknown + if [[ "$capacity_gb" -eq 0 ]]; then + capacity_human="Unknown" + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" - echo "Capacity: ${capacity:-Unknown}" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" - - # Only show power on hours if available - if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then - echo "Power On Hours: $power_on_hours" - else - echo "Power On Hours: Unknown" - fi + echo "Power On Hours: ${power_on_hours:-Unknown}" # Disk type specific analysis - if [[ "$disk_type" == "SSD" ]]; then - # Get the actual attribute name for TBW calculation - local attributes=$(smartctl -A "$disk" 2>/dev/null) - local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written" | head -1 | awk '{print $2}') + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then local tbw_used=0 if [[ -n "$total_written" && "$total_written" != "0" ]]; then - tbw_used=$(calculate_tbw "" "$total_written" "$model" "$tbw_attribute_name") + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then - tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name") + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") fi - # Always show TBW information for SSDs + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" - # Estimate capacity for endurance calculation - local capacity_gb=0 - if echo "$capacity" | grep -qi "GB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) - elif echo "$capacity" | grep -qi "TB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) - fi - - local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) - local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) - echo "TBW Remaining: $tbw_remaining" - echo "Lifespan: $lifespan_percent ($wear_status)" - - # Show wear source if available - if [[ "$wear_source" == "media_wearout" ]]; then - echo "Wear Source: Media Wearout Indicator" - elif [[ "$wear_source" == "tbw" ]]; then - echo "Wear Source: TBW Calculation" - elif [[ "$wear_source" == "estimated" ]]; then - echo "Wear Source: Estimated Endurance" + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" fi - elif [[ "$disk_type" == "HDD" ]]; then - if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then - echo "Realloc Sectors: $reallocated_sectors" + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" fi - if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then - echo "Pending Sectors: $pending_sectors" - fi - - local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") - echo "Lifespan: $lifespan" else - print_color $YELLOW "Limited information available for this disk type" - echo "This is normal for hardware RAID configurations like PERC H730P" - echo "For detailed SAS drive information, use controller management tools" + print_color $YELLOW "Unknown disk type - limited information available" fi echo "" } -# Function to detect all disks with enhanced SAS support (no partitions) +# Function to detect all disks detect_disks() { local disks=() - # Check for SATA/SAS disks - only main devices, no partitions - for disk in /dev/sd[a-z]; do + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for NVMe disks - only main devices, no partitions + # Check for NVMe disks (base devices only, no partitions) for disk in /dev/nvme[0-9]n[0-9]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for SAS disks via SCSI generic - only main devices - for disk in /dev/sg[0-9]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - # Check for other disk types - only main devices + # Check for other disk types for disk in /dev/vd[a-z] /dev/xvd[a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") @@ -522,40 +672,19 @@ detect_disks() { echo "${disks[@]}" } -# Function to detect RAID controllers (Ubuntu specific) -detect_raid_controllers() { - local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") - local raid_disks=() - - # Check for RAID controllers - for controller in "${controllers[@]}"; do - for i in {0..31}; do - # Try different disk devices for each controller - for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do - if [[ -b "$base_disk" ]]; then - if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then - raid_disks+=("$base_disk:$controller,$i") - break - fi - fi - done - done - done - - echo "${raid_disks[@]}" -} - # Main function main() { - print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" - print_color $BLUE "Enhanced with PERC H730P and SAS Support" - print_color $BLUE "============================================" + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" echo "" check_dependencies local disks=() + # Check for soft-raid first + check_mdraid + # If specific disk provided, check only that disk if [[ $# -gt 0 ]]; then for disk in "$@"; do @@ -567,16 +696,8 @@ main() { done else # Auto-detect disks - print_color $CYAN "Auto-detecting disks (excluding partitions)..." - local direct_disks=() - read -ra direct_disks <<< "$(detect_disks)" - - print_color $CYAN "Scanning for RAID controllers..." - local raid_disks=() - read -ra raid_disks <<< "$(detect_raid_controllers)" - - # Combine both lists - disks=("${direct_disks[@]}" "${raid_disks[@]}") + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then @@ -590,33 +711,20 @@ main() { # Check if running as root, warn if not if [[ $EUID -ne 0 ]]; then - print_color $YELLOW "Warning: Not running as root." - print_color $YELLOW "Some disks/controllers may show limited information." + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." echo "For complete results, run as: sudo $0" echo "" fi # Check each disk - for disk_info in "${disks[@]}"; do - # Check if this is a RAID disk (has controller specified) - if [[ "$disk_info" == *":"* ]]; then - IFS=':' read -r disk controller <<< "$disk_info" - check_disk "$disk" "$controller" - else - check_disk "$disk_info" - fi + for disk in "${disks[@]}"; do + check_disk "$disk" done print_color $BLUE "Check completed!" echo "" - print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" - print_color $CYAN " - Install 'storcli' for detailed controller information" - print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" - print_color $CYAN " - Hardware RAID controllers often limit SMART data access" - echo "" - print_color $CYAN "Ubuntu-specific tips:" - print_color $CYAN " - Use 'lsblk' to see all available block devices" - print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." } # Usage information @@ -629,9 +737,10 @@ usage() { echo " $SCRIPT_NAME # Check all auto-detected disks" echo " sudo $SCRIPT_NAME # Check all disks (as root)" echo " $SCRIPT_NAME /dev/sda # Check specific disk" - echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" - echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" } # Parse command line arguments diff --git a/old/ubuntu-v2.7.sh b/old/ubuntu-v2.7.sh index e90ba2a..033932e 100755 --- a/old/ubuntu-v2.7.sh +++ b/old/ubuntu-v2.7.sh @@ -1,8 +1,8 @@ #!/bin/bash -# Disk Health Check Script for Ubuntu 24.04 -# Enhanced with SAS/PERC H730P controller support -# Checks SSD TBW/lifespan and HDD health status +# Disk Health Check Script for Ubuntu +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification SCRIPT_NAME=$(basename "$0") VERSION="2.7" @@ -13,6 +13,7 @@ GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3) BLUE=$(tput setaf 4) CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) NC=$(tput sgr0) # Function to print colored output @@ -46,245 +47,153 @@ check_dependencies() { fi } -# Function to test SMART access and get available data -test_smart_access() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - # Test basic SMART access - if ! $smart_cmd -i "$disk" &>/dev/null; then - echo "no_access" - return - fi - - echo "full_access" -} +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) -# Function to get disk information with enhanced SAS and NVMe support -get_disk_info() { - local disk=$1 - local controller=$2 - - local smart_cmd="smartctl" - [[ -n "$controller" ]] && smart_cmd+=" -d $controller" - - local info=$($smart_cmd -i "$disk" 2>/dev/null) - local attributes=$($smart_cmd -A "$disk" 2>/dev/null) - local health=$($smart_cmd -H "$disk" 2>/dev/null) - - # For NVMe drives, use -x for extended information - if echo "$info" | grep -qi "NVMe"; then - local nvme_info=$($smart_cmd -x "$disk" 2>/dev/null) - attributes="$nvme_info" - fi - - # Extract information with multiple fallbacks - local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -n "$vendor" && -n "$model" ]] && model="$vendor $model" - - local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1) - - local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - - local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1) - [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1) - - # Get disk type - local disk_type="UNKNOWN" - if echo "$info" | grep -qi "Solid State Device\|NVMe"; then - disk_type="SSD" - elif echo "$info" | grep -qi "Rotation Rate"; then - disk_type="HDD" - elif echo "$info" | grep -qi "SCSI\|SAS"; then - if echo "$info" | grep -qi "15000\|10000\|7200"; then - disk_type="HDD" - else - disk_type="SSD" - fi - fi - - # Extract SMART attributes with multiple field attempts - local power_on_hours="" - - # Try different power on hours attributes - if echo "$attributes" | grep -qi "Power_On_Hours"; then - power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | head -1 | awk '{print $10}') - elif echo "$attributes" | grep -qi "Power On Hours"; then - power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}') - fi - - # Clean power_on_hours to remove non-numeric characters for HDDs - if [[ "$disk_type" == "HDD" ]]; then - power_on_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') - fi - - local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) - - local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1) - - # For Kingston and other SSDs with different attribute names - local total_written="" - local host_writes_32mib="" - - # Try different write attributes for different drive types - if echo "$model" | grep -qi "KINGSTON.*SA400"; then - # Kingston SA400 uses Flash_Writes_GiB and Lifetime_Writes_GiB - total_written=$(echo "$attributes" | grep -i "Flash_Writes_GiB" | awk '{print $NF}' | head -1) - [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB" | awk '{print $NF}' | head -1) - elif echo "$info" | grep -qi "NVMe"; then - # NVMe drives use Data Units Written - total_written=$(echo "$attributes" | grep -i "Data Units Written" | head -1 | awk '{print $NF}') - # Also try to get power on hours from NVMe - if [[ -z "$power_on_hours" ]]; then - power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}') - fi - else - # Standard SATA SSDs - total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | awk '{print $10}' | head -1) - host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1) - fi - - # For wear leveling indicators - local media_wearout="" - if echo "$model" | grep -qi "KINGSTON.*SA400"; then - media_wearout=$(echo "$attributes" | grep -i "SSD_Life_Left" | awk '{print $NF}' | head -1) - elif echo "$info" | grep -qi "NVMe"; then - media_wearout=$(echo "$attributes" | grep -i "Percentage Used" | head -1 | awk '{print $NF}') - [[ -z "$media_wearout" ]] && media_wearout=$(echo "$attributes" | grep -i "Available Spare" | head -1 | awk '{print $NF}') - else - media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count" | awk '{print $10}' | head -1) - fi - - # If media_wearout is percentage used for NVMe, convert to remaining life - if echo "$info" | grep -qi "NVMe" && [[ -n "$media_wearout" ]]; then - if echo "$attributes" | grep -qi "Percentage Used"; then - # Convert percentage used to percentage remaining - media_wearout=$((100 - media_wearout)) - fi - fi - - echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout" -} +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) -# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe -calculate_tbw() { - local raw_value=$1 - local sectors=$2 - local disk_model=$3 - local attribute_name=$4 +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") - # Kingston SA400 SSDs use Flash_Writes_GiB (value in GiB) - if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then - if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Convert from GiB to TB (1 TiB = 1024 GiB, but using 1000 for TB) - local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier return fi + done + # For larger than 8TB, use proportional scaling from 4TB + echo "8000" +} + +# Function to convert bytes to human readable +bytes_to_human() { + local bytes=$1 + if [[ $bytes -ge 1099511627776 ]]; then + echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" + elif [[ $bytes -ge 1073741824 ]]; then + echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" + elif [[ $bytes -ge 1048576 ]]; then + echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" + else + echo "$bytes bytes" fi - - # NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+) - if echo "$disk_model" | grep -qi "NVMe"; then - if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Convert from data units to TB (1 unit = 1,000,000 bytes) - local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null) - local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" - return - fi - fi - - # Standard SATA SSDs with Total_LBAs_Written - if [[ -n "$sectors" && "$sectors" != "0" ]]; then - local bytes=$((sectors * 512)) - local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" - elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - # Host_Writes_32MiB (value in 32MiB chunks) - local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") - echo "$tbw" +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + # Remove everything after non-numeric characters + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" else echo "0" fi } -# Function to estimate SSD endurance based on model and capacity -estimate_ssd_endurance() { - local disk_model=$1 - local capacity_gb=$2 +# Function to get disk type and interface +get_disk_info() { + local disk=$1 - # Kingston consumer SSDs - if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then - if [[ $capacity_gb -ge 960 ]]; then - echo "300" # 300TB for 960GB Kingston SA400 - elif [[ $capacity_gb -ge 480 ]]; then - echo "150" # 150TB for 480GB Kingston - else - echo "80" # 80TB for smaller Kingston + local info=$(smartctl -i "$disk" 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + disk_type="SAS" + transport="SAS" + is_enterprise=true + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device"; then + disk_type="SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true fi - # Kingston NVMe SSDs - elif echo "$disk_model" | grep -qi "KINGSTON.*SA2000"; then - if [[ $capacity_gb -ge 2000 ]]; then - echo "800" # 800TB for 2TB Kingston NVMe - elif [[ $capacity_gb -ge 1000 ]]; then - echo "400" # 400TB for 1TB Kingston NVMe - elif [[ $capacity_gb -ge 500 ]]; then - echo "200" # 200TB for 500GB Kingston NVMe + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") else - echo "100" # 100TB for smaller Kingston NVMe - fi - # NVMe SSDs typically have higher endurance - elif echo "$disk_model" | grep -qi "NVMe"; then - if [[ $capacity_gb -ge 2000 ]]; then - echo "1200" # 1.2PB for 2TB+ NVMe - elif [[ $capacity_gb -ge 1000 ]]; then - echo "600" # 600TB for 1TB NVMe - elif [[ $capacity_gb -ge 500 ]]; then - echo "300" # 300TB for 500GB NVMe - else - echo "150" # 150TB for smaller NVMe - fi - # SAS SSDs typically have very high endurance - elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then - if [[ $capacity_gb -ge 1000 ]]; then - echo "10000" # 10PB for 1TB+ enterprise SAS SSD - elif [[ $capacity_gb -ge 600 ]]; then - echo "6000" # 6PB for 600GB enterprise SAS SSD - elif [[ $capacity_gb -ge 400 ]]; then - echo "4000" # 4PB for 400GB enterprise SAS SSD - else - echo "2000" # 2PB for smaller enterprise SAS SSD - fi - elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then - # Enterprise SATA/NVMe SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "1200" # 1.2PB for 1TB enterprise - elif [[ $capacity_gb -ge 480 ]]; then - echo "600" # 600TB for 480GB enterprise - elif [[ $capacity_gb -ge 240 ]]; then - echo "300" # 300TB for 240GB enterprise - else - echo "150" # 150TB for smaller enterprise + # SATA SSD: various manufacturers + tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + + # HDDs don't have TBW + if [[ "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" else - # Consumer SSDs - if [[ $capacity_gb -ge 1000 ]]; then - echo "600" # 600TB for 1TB consumer - elif [[ $capacity_gb -ge 480 ]]; then - echo "300" # 300TB for 480GB consumer - elif [[ $capacity_gb -ge 240 ]]; then - echo "150" # 150TB for 240GB consumer - elif [[ $capacity_gb -ge 120 ]]; then - echo "80" # 80TB for 120GB consumer - else - echo "40" # 40TB for smaller drives - fi + echo "${CONSUMER_TBW[$capacity_tier]}" fi } @@ -292,67 +201,40 @@ estimate_ssd_endurance() { estimate_ssd_lifespan() { local power_on_hours=$1 local tbw_used=$2 - local disk_model=$3 - local capacity_gb=$4 - local media_wearout=$5 + local estimated_endurance=$3 + local disk_type=$4 if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then - echo "Unknown||Unknown||Unknown" + echo "Unknown||Unknown|New" return fi - local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb") - - # Handle case where estimated_endurance might be empty - if [[ -z "$estimated_endurance" || "$estimated_endurance" -eq 0 ]]; then - echo "Unknown||Unknown||Unknown" + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|N/A|HDD" return fi - local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0") - - # If we have media wearout indicator, use it for more accurate estimation - if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then - # For Kingston, SSD_Life_Left is already a percentage - if echo "$disk_model" | grep -qi "KINGSTON"; then - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - else - # For other drives, media_wearout might be countdown from 100 - local wear_percent=$media_wearout - if [[ $media_wearout -le 10 ]]; then - echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout" - elif [[ $media_wearout -le 30 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout" - elif [[ $media_wearout -le 70 ]]; then - echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout" - else - echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout" - fi - fi - return + # Handle the case where tbw_used might have formatting issues + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 fi - if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") + local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") + + if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then + local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then - echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw" + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then - echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw" + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" else - echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw" + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" fi else - echo "Unknown|${estimated_endurance} TB|New|estimated" + echo "Unknown|${estimated_endurance} TB|New|$disk_type" fi } @@ -361,198 +243,376 @@ estimate_hdd_lifespan() { local power_on_hours=$1 local reallocated_sectors=$2 local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 - # Clean power_on_hours to extract just the numeric part - local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//') - clean_hours=${clean_hours:-0} + # Extract numeric hours only + local numeric_hours=$(extract_numeric_hours "$power_on_hours") - if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then echo "Unknown" return fi - reallocated_sectors=${reallocated_sectors:-0} - pending_sectors=${pending_sectors:-0} + local severity=0 + # Critical issues if [[ "$pending_sectors" -gt 0 ]]; then echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return elif [[ "$reallocated_sectors" -gt 100 ]]; then - echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)" + severity=$((severity + 3)) elif [[ "$reallocated_sectors" -gt 10 ]]; then - echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)" - elif [[ "$clean_hours" -gt 40000 ]]; then - echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)" - elif [[ "$clean_hours" -gt 25000 ]]; then - echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)" + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" else - echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)" + echo "${GREEN}> 3 years${NC} (Healthy)" fi } -# Function to check a single disk with enhanced error handling +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to extract capacity in GB from various formats - IMPROVED VERSION +extract_capacity_gb() { + local capacity=$1 + local capacity_gb=0 + + # Remove extra spaces and normalize + capacity=$(echo "$capacity" | sed 's/ */ /g') + + # Method 1: Try to extract from User Capacity field with bytes + if [[ $capacity =~ ([0-9,]+)\s*bytes ]]; then + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + fi + + # Method 2: Try to extract from bracket format [XXX GB] or [X.XX TB] + if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ \[([0-9,.]+)\s*([GT])B?\] ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + fi + + # Method 3: Try direct TB/GB pattern matching + if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*TB ]]; then + local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',') + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + elif [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*GB ]]; then + local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',') + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + + # Method 4: For NVMe - try different field formats + if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then + local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + fi + + # Ensure we have a valid number + if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then + echo "0" + else + echo "$capacity_gb" + fi +} + +# Function to get NVMe capacity using smartctl +get_nvme_capacity() { + local disk=$1 + local nvme_info=$(smartctl -i "$disk" 2>/dev/null) + local capacity="" + + # Try to get capacity from different fields + capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') + fi + if [[ -z "$capacity" ]]; then + capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + fi + + echo "$capacity" +} + +# Function to get human readable capacity +get_human_capacity() { + local capacity_gb=$1 + + if [[ $capacity_gb -ge 1000 ]]; then + echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" + else + echo "${capacity_gb} GB" + fi +} + +# Function to get capacity using block device information as fallback +get_block_device_capacity() { + local disk=$1 + local capacity_gb=0 + + # Try to get capacity from block device using lsblk or fdisk + if command_exists lsblk; then + local block_size=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null | head -1) + if [[ -n "$block_size" && "$block_size" =~ ^[0-9]+$ ]]; then + capacity_gb=$(echo "scale=0; $block_size / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) + fi + fi + + # Alternative method using fdisk + if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then + local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk") + if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) + else + capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) + fi + fi + fi + + echo "$capacity_gb" +} + +# Function to check a single disk check_disk() { local disk=$1 - local controller=$2 - print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + print_color $CYAN "Checking disk: $disk" echo "==================================================" - # Test SMART access level - local access_level=$(test_smart_access "$disk" "$controller") - - case $access_level in - "no_access") - print_color $RED "ERROR: Cannot access disk through controller" - echo "Possible reasons:" - echo " - Controller doesn't support SMART passthrough" - echo " - Disk is part of a hardware RAID array" - echo " - Insufficient permissions (try running as root)" - echo " - Controller busy or offline" - echo "" - return - ;; - "not_available") - print_color $YELLOW "SMART not available on this disk" - echo "This disk does not support SMART monitoring" - echo "" - return - ;; - "disabled") - print_color $YELLOW "SMART is disabled on this disk" - echo "SMART is available but currently disabled" - echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk" - echo "" - return - ;; - "no_attributes") - print_color $YELLOW "WARNING: Cannot read SMART attributes" - echo "This is common with hardware RAID controllers like PERC H730P" - echo "Try checking through the RAID management interface" - echo "" - return - ;; - "limited_attributes") - print_color $YELLOW "NOTE: Limited SMART data available" - echo "Controller is filtering some SMART attributes" - ;; - esac + # Check if disk exists and is accessible + if [[ ! -b "$disk" ]]; then + print_color $RED "Error: $disk is not a valid block device" + echo "" + return + fi # Get disk information - local disk_info=$(get_disk_info "$disk" "$controller") - IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info" + local disk_info=$(get_disk_info "$disk") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$(smartctl -i "$disk" 2>/dev/null) + local health=$(smartctl -H "$disk" 2>/dev/null) + local attributes=$(smartctl -A "$disk" 2>/dev/null) + + # Check if smartctl command succeeded + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # For NVMe disks, try to get capacity from different fields + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_capacity=$(get_nvme_capacity "$disk") + if [[ -n "$nvme_capacity" ]]; then + capacity="$nvme_capacity" + fi + fi + + # Extract capacity in GB and human readable format + local capacity_gb=$(extract_capacity_gb "$capacity") + local capacity_human="" + + # If capacity extraction failed, try block device method + if [[ "$capacity_gb" -eq 0 ]]; then + capacity_gb=$(get_block_device_capacity "$disk") + fi + + # Generate human readable capacity + if [[ "$capacity_gb" -gt 0 ]]; then + capacity_human=$(get_human_capacity "$capacity_gb") + else + capacity_human="Unknown" + fi + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes + local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) + local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") + local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) + local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) + local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) + local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) + local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) + local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) + + # For NVMe disks using smartctl extended attributes + if [[ "$disk_type" == "NVMe" ]]; then + local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) + # Extract data units written for NVMe + local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + else + # Try alternative field + data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') + if [[ -n "$data_units_written" ]]; then + total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) + fi + fi + # Get power on hours for NVMe + local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') + if [[ -n "$nvme_power_hours" ]]; then + power_on_hours="$nvme_power_hours" + fi + fi # Display basic information echo "Model: ${model:-Unknown}" echo "Serial: ${serial:-Unknown}" echo "Type: $disk_type" - echo "Capacity: ${capacity:-Unknown}" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" echo "Firmware: ${firmware:-Unknown}" echo "Health: ${health_status:-Unknown}" - - # Only show power on hours if available - if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then - echo "Power On Hours: $power_on_hours" - else - echo "Power On Hours: Unknown" - fi + echo "Power On Hours: ${power_on_hours:-Unknown}" # Disk type specific analysis - if [[ "$disk_type" == "SSD" ]]; then - # Get the actual attribute name for TBW calculation - local attributes="" - if echo "$model" | grep -qi "NVMe"; then - attributes=$(smartctl -x "$disk" 2>/dev/null) - else - attributes=$(smartctl -A "$disk" 2>/dev/null) - fi + if [[ "$disk_type" == "HDD" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" + [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" + [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" - local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written\|Total_LBAs_Written" | head -1 | awk '{print $2}') + local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") + echo "Lifespan: $lifespan" + elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then local tbw_used=0 if [[ -n "$total_written" && "$total_written" != "0" ]]; then - tbw_used=$(calculate_tbw "$total_written" "" "$model" "$tbw_attribute_name") + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then - tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name") + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") fi - # Always show TBW information for SSDs + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" - # Estimate capacity for endurance calculation - local capacity_gb=0 - if echo "$capacity" | grep -qi "GB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1) - elif echo "$capacity" | grep -qi "TB"; then - capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1) - else - # Try to extract capacity from raw number - capacity_gb=$(echo "$capacity" | grep -o '[0-9]*' | head -1) - capacity_gb=$((capacity_gb / 1000000000)) - fi - - local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout") + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) - local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4) - echo "TBW Remaining: $tbw_remaining" - echo "Lifespan: $lifespan_percent ($wear_status)" - - # Show wear source if available - if [[ "$wear_source" == "media_wearout" ]]; then - echo "Wear Source: Media Wearout Indicator" - elif [[ "$wear_source" == "tbw" ]]; then - echo "Wear Source: TBW Calculation" - elif [[ "$wear_source" == "estimated" ]]; then - echo "Wear Source: Estimated Endurance" + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" fi - elif [[ "$disk_type" == "HDD" ]]; then - if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then - echo "Realloc Sectors: $reallocated_sectors" + # Show mechanical attributes for SAS drives that might be SSDs + if [[ "$disk_type" == "SAS" ]]; then + echo "Realloc Sectors: ${reallocated_sectors:-0}" + echo "Pending Sectors: ${pending_sectors:-0}" fi - if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then - echo "Pending Sectors: $pending_sectors" - fi - - local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}") - echo "Lifespan: $lifespan" else - print_color $YELLOW "Limited information available for this disk type" - echo "This is normal for hardware RAID configurations like PERC H730P" - echo "For detailed SAS drive information, use controller management tools" + print_color $YELLOW "Unknown disk type - limited information available" fi echo "" } -# Function to detect all disks with enhanced SAS support (no partitions) +# Function to detect all disks detect_disks() { local disks=() - # Check for SATA/SAS disks - only main devices, no partitions - for disk in /dev/sd[a-z]; do + # Check for SATA/SAS disks + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for NVMe disks - only main devices, no partitions + # Check for NVMe disks (base devices only, no partitions) for disk in /dev/nvme[0-9]n[0-9]; do if [[ -b "$disk" ]]; then disks+=("$disk") fi done - # Check for SAS disks via SCSI generic - only main devices - for disk in /dev/sg[0-9]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - # Check for other disk types - only main devices + # Check for other disk types for disk in /dev/vd[a-z] /dev/xvd[a-z]; do if [[ -b "$disk" ]]; then disks+=("$disk") @@ -562,40 +622,19 @@ detect_disks() { echo "${disks[@]}" } -# Function to detect RAID controllers (Ubuntu specific) -detect_raid_controllers() { - local controllers=("megaraid" "cciss" "areca" "3ware" "hpt") - local raid_disks=() - - # Check for RAID controllers - for controller in "${controllers[@]}"; do - for i in {0..31}; do - # Try different disk devices for each controller - for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do - if [[ -b "$base_disk" ]]; then - if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then - raid_disks+=("$base_disk:$controller,$i") - break - fi - fi - done - done - done - - echo "${raid_disks[@]}" -} - # Main function main() { - print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION" - print_color $BLUE "Enhanced with PERC H730P and SAS Support" - print_color $BLUE "============================================" + print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" + print_color $BLUE "==============================================" echo "" check_dependencies local disks=() + # Check for soft-raid first + check_mdraid + # If specific disk provided, check only that disk if [[ $# -gt 0 ]]; then for disk in "$@"; do @@ -607,16 +646,8 @@ main() { done else # Auto-detect disks - print_color $CYAN "Auto-detecting disks (excluding partitions)..." - local direct_disks=() - read -ra direct_disks <<< "$(detect_disks)" - - print_color $CYAN "Scanning for RAID controllers..." - local raid_disks=() - read -ra raid_disks <<< "$(detect_raid_controllers)" - - # Combine both lists - disks=("${direct_disks[@]}" "${raid_disks[@]}") + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_disks)" fi if [[ ${#disks[@]} -eq 0 ]]; then @@ -630,33 +661,20 @@ main() { # Check if running as root, warn if not if [[ $EUID -ne 0 ]]; then - print_color $YELLOW "Warning: Not running as root." - print_color $YELLOW "Some disks/controllers may show limited information." + print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." echo "For complete results, run as: sudo $0" echo "" fi # Check each disk - for disk_info in "${disks[@]}"; do - # Check if this is a RAID disk (has controller specified) - if [[ "$disk_info" == *":"* ]]; then - IFS=':' read -r disk controller <<< "$disk_info" - check_disk "$disk" "$controller" - else - check_disk "$disk_info" - fi + for disk in "${disks[@]}"; do + check_disk "$disk" done print_color $BLUE "Check completed!" echo "" - print_color $CYAN "Note: For PERC H730P controllers with SAS drives:" - print_color $CYAN " - Install 'storcli' for detailed controller information" - print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access" - print_color $CYAN " - Hardware RAID controllers often limit SMART data access" - echo "" - print_color $CYAN "Ubuntu-specific tips:" - print_color $CYAN " - Use 'lsblk' to see all available block devices" - print_color $CYAN " - Use 'lshw -class disk' for detailed disk information" + print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." + print_color $YELLOW " Actual endurance for your specific drive model may be higher." } # Usage information @@ -669,9 +687,10 @@ usage() { echo " $SCRIPT_NAME # Check all auto-detected disks" echo " sudo $SCRIPT_NAME # Check all disks (as root)" echo " $SCRIPT_NAME /dev/sda # Check specific disk" - echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk" - echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" } # Parse command line arguments diff --git a/ubuntu-v2.6.sh b/ubuntu-v2.6.sh deleted file mode 100755 index b75c9d3..0000000 --- a/ubuntu-v2.6.sh +++ /dev/null @@ -1,759 +0,0 @@ -#!/bin/bash - -# Disk Health Check Script for Ubuntu -# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid -# Supports consumer and enterprise disk classification - -SCRIPT_NAME=$(basename "$0") -VERSION="2.6" - -# Color codes -RED=$(tput setaf 1) -GREEN=$(tput setaf 2) -YELLOW=$(tput setaf 3) -BLUE=$(tput setaf 4) -CYAN=$(tput setaf 6) -MAGENTA=$(tput setaf 5) -NC=$(tput sgr0) - -# Function to print colored output -print_color() { - local color=$1 - local message=$2 - echo -e "${color}${message}${NC}" -} - -# Check if command exists -command_exists() { - command -v "$1" >/dev/null 2>&1 -} - -# Check dependencies -check_dependencies() { - local missing=() - - if ! command_exists smartctl; then - missing+=("smartmontools") - fi - - if ! command_exists bc; then - missing+=("bc") - fi - - if [[ ${#missing[@]} -gt 0 ]]; then - print_color $RED "Error: Missing required packages: ${missing[*]}" - echo "Install with: sudo apt update && sudo apt install ${missing[*]}" - exit 1 - fi -} - -# TBW endurance standards (using lowest numbers) -declare -A CONSUMER_TBW=( - ["250"]=150 - ["500"]=300 - ["1000"]=600 - ["2000"]=1200 - ["4000"]=2400 - ["8000"]=4800 -) - -declare -A ENTERPRISE_TBW=( - ["250"]=450 - ["500"]=900 - ["1000"]=1800 - ["2000"]=3600 - ["4000"]=7200 - ["8000"]=14400 -) - -# Function to get closest capacity tier -get_capacity_tier() { - local capacity_gb=$1 - local tiers=("250" "500" "1000" "2000" "4000" "8000") - - for tier in "${tiers[@]}"; do - if [[ $capacity_gb -le $tier ]]; then - echo $tier - return - fi - done - # For larger than 8TB, use proportional scaling from 4TB - echo "8000" -} - -# Function to convert bytes to human readable -bytes_to_human() { - local bytes=$1 - if [[ $bytes -ge 1099511627776 ]]; then - echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB" - elif [[ $bytes -ge 1073741824 ]]; then - echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB" - elif [[ $bytes -ge 1048576 ]]; then - echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB" - else - echo "$bytes bytes" - fi -} - -# Function to extract numeric hours from power_on_hours field -extract_numeric_hours() { - local power_on_hours=$1 - # Remove everything after non-numeric characters - local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') - if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then - echo "$numeric_hours" - else - echo "0" - fi -} - -# Function to get disk type and interface -get_disk_info() { - local disk=$1 - - local info=$(smartctl -i "$disk" 2>/dev/null) - local transport="" - local disk_type="UNKNOWN" - local is_enterprise=false - - # Check if it's NVMe - if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then - disk_type="NVMe" - transport="NVMe" - # Check for SAS - elif echo "$info" | grep -qi "SAS"; then - disk_type="SAS" - transport="SAS" - is_enterprise=true - # Check for SATA SSD - elif echo "$info" | grep -qi "Solid State Device"; then - disk_type="SSD" - transport="SATA" - # Check for SATA HDD - elif echo "$info" | grep -qi "Rotation Rate"; then - disk_type="HDD" - transport="SATA" - fi - - # Check for enterprise features - if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then - is_enterprise=true - fi - - # Check device type by model name - local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') - if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then - if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then - is_enterprise=true - fi - fi - - echo "$disk_type|$transport|$is_enterprise" -} - -# Function to calculate TBW for SSD/NVMe -calculate_tbw() { - local disk_type=$1 - local raw_value=$2 - local sectors=$3 - - local tbw=0 - - if [[ -n "$sectors" && "$sectors" != "0" ]]; then - # Calculate from sectors (most common for SATA SSDs) - local bytes=$((sectors * 512)) - tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0") - elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then - if [[ "$disk_type" == "NVMe" ]]; then - # NVMe: raw value is in 32MB units - tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") - else - # SATA SSD: various manufacturers - tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0") - fi - fi - - echo "$tbw" -} - -# Function to get estimated endurance -get_estimated_endurance() { - local capacity_gb=$1 - local is_enterprise=$2 - local disk_type=$3 - - # HDDs don't have TBW - if [[ "$disk_type" == "HDD" ]]; then - echo "N/A" - return - fi - - local capacity_tier=$(get_capacity_tier "$capacity_gb") - - if [[ "$is_enterprise" == "true" ]]; then - echo "${ENTERPRISE_TBW[$capacity_tier]}" - else - echo "${CONSUMER_TBW[$capacity_tier]}" - fi -} - -# Function to estimate SSD lifespan with TBW remaining -estimate_ssd_lifespan() { - local power_on_hours=$1 - local tbw_used=$2 - local estimated_endurance=$3 - local disk_type=$4 - - if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then - echo "Unknown||Unknown|New" - return - fi - - if [[ "$estimated_endurance" == "N/A" ]]; then - echo "N/A|N/A|N/A|HDD" - return - fi - - # Handle the case where tbw_used might have formatting issues - local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') - if [[ -z "$clean_tbw_used" ]]; then - clean_tbw_used=0 - fi - - local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance") - - if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then - local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0") - local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100") - - if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then - echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type" - elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then - echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type" - else - echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type" - fi - else - echo "Unknown|${estimated_endurance} TB|New|$disk_type" - fi -} - -# Function to estimate HDD lifespan -estimate_hdd_lifespan() { - local power_on_hours=$1 - local reallocated_sectors=$2 - local pending_sectors=$3 - local start_stop_count=$4 - local load_cycle_count=$5 - local disk_type=$6 - - # Extract numeric hours only - local numeric_hours=$(extract_numeric_hours "$power_on_hours") - - if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then - echo "Unknown" - return - fi - - local severity=0 - - # Critical issues - if [[ "$pending_sectors" -gt 0 ]]; then - echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" - return - elif [[ "$reallocated_sectors" -gt 100 ]]; then - severity=$((severity + 3)) - elif [[ "$reallocated_sectors" -gt 10 ]]; then - severity=$((severity + 2)) - elif [[ "$reallocated_sectors" -gt 0 ]]; then - severity=$((severity + 1)) - fi - - # Usage-based assessment - if [[ "$numeric_hours" -gt 50000 ]]; then - severity=$((severity + 3)) - elif [[ "$numeric_hours" -gt 30000 ]]; then - severity=$((severity + 2)) - elif [[ "$numeric_hours" -gt 15000 ]]; then - severity=$((severity + 1)) - fi - - # Mechanical wear (for HDDs) - if [[ "$disk_type" == "HDD" ]]; then - if [[ "$start_stop_count" -gt 50000 ]]; then - severity=$((severity + 2)) - elif [[ "$start_stop_count" -gt 20000 ]]; then - severity=$((severity + 1)) - fi - - if [[ "$load_cycle_count" -gt 500000 ]]; then - severity=$((severity + 2)) - elif [[ "$load_cycle_count" -gt 200000 ]]; then - severity=$((severity + 1)) - fi - fi - - if [[ $severity -ge 5 ]]; then - echo "${RED}< 6 months${NC} (Multiple risk factors)" - elif [[ $severity -ge 3 ]]; then - echo "${YELLOW}6-18 months${NC} (Moderate wear)" - elif [[ $severity -ge 1 ]]; then - echo "${YELLOW}1-3 years${NC} (Light wear)" - else - echo "${GREEN}> 3 years${NC} (Healthy)" - fi -} - -# Function to check soft-raid (MDRAID) -check_mdraid() { - local md_devices=() - - if [[ -f /proc/mdstat ]]; then - while IFS= read -r line; do - if [[ $line =~ ^md[0-9]+ ]]; then - md_devices+=("/dev/${line%% *}") - fi - done < /proc/mdstat - fi - - for md in "${md_devices[@]}"; do - if [[ -b "$md" ]]; then - print_color $MAGENTA "Found software RAID: $md" - if command_exists mdadm; then - local md_info=$(mdadm --detail "$md" 2>/dev/null) - if [[ -n "$md_info" ]]; then - echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" - echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" - echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" - echo "" - fi - fi - fi - done -} - -# Function to extract capacity in GB from various formats - IMPROVED FOR HDD/SAS -extract_capacity_gb() { - local capacity=$1 - local capacity_gb=0 - - # Debug: Show what we're trying to parse - # echo "DEBUG: Parsing capacity: '$capacity'" >&2 - - # Try different patterns to extract capacity - if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then - # Pattern: [1.82 TB] - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) - # echo "DEBUG: Pattern 1 - TB size: $size, GB: $capacity_gb" >&2 - elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then - # Pattern: [500.1 GB] - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) - # echo "DEBUG: Pattern 2 - GB size: $size, GB: $capacity_gb" >&2 - elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then - # Pattern: 500,107,862,016 bytes - local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) - # echo "DEBUG: Pattern 3 - bytes: $bytes, GB: $capacity_gb" >&2 - elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then - # Pattern: 1.82TB - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1) - # echo "DEBUG: Pattern 4 - TB size: $size, GB: $capacity_gb" >&2 - elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then - # Pattern: 500.1GB - local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1) - # echo "DEBUG: Pattern 5 - GB size: $size, GB: $capacity_gb" >&2 - elif [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then - # Pattern: 2000398934016B - local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',') - capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1) - # echo "DEBUG: Pattern 6 - bytes: $bytes, GB: $capacity_gb" >&2 - fi - - # Ensure we have a valid number - if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then - echo "0" - else - echo "$capacity_gb" - fi -} - -# Function to get NVMe capacity using smartctl -get_nvme_capacity() { - local disk=$1 - local nvme_info=$(smartctl -i "$disk" 2>/dev/null) - local capacity="" - - # Try to get capacity from different fields - capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') - if [[ -z "$capacity" ]]; then - capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//') - fi - if [[ -z "$capacity" ]]; then - capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) - fi - - echo "$capacity" -} - -# Function to get human readable capacity -get_human_capacity() { - local capacity_gb=$1 - - if [[ $capacity_gb -ge 1000 ]]; then - echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB" - else - echo "${capacity_gb} GB" - fi -} - -# Function to extract capacity from model name for HDD/SAS drives -extract_capacity_from_model() { - local model=$1 - local disk_type=$2 - - # Common HDD/SAS capacity patterns in model names - if [[ "$model" =~ 2[Tt][Bb] ]] || [[ "$model" =~ 2000[Gg] ]]; then - echo "2000" - elif [[ "$model" =~ 1[Tt][Bb] ]] || [[ "$model" =~ 1000[Gg] ]]; then - echo "1000" - elif [[ "$model" =~ 4[Tt][Bb] ]] || [[ "$model" =~ 4000[Gg] ]]; then - echo "4000" - elif [[ "$model" =~ 8[Tt][Bb] ]] || [[ "$model" =~ 8000[Gg] ]]; then - echo "8000" - elif [[ "$model" =~ 500[Gg] ]]; then - echo "500" - elif [[ "$model" =~ 250[Gg] ]]; then - echo "250" - else - # Try to extract numbers that look like capacities - local capacity_match=$(echo "$model" | grep -oE '[0-9]+[GT]B' | head -1) - if [[ -n "$capacity_match" ]]; then - if [[ "$capacity_match" =~ ([0-9]+)TB ]]; then - echo "$((${BASH_REMATCH[1]} * 1000))" - elif [[ "$capacity_match" =~ ([0-9]+)GB ]]; then - echo "${BASH_REMATCH[1]}" - fi - else - echo "0" - fi - fi -} - -# Function to check a single disk -check_disk() { - local disk=$1 - - print_color $CYAN "Checking disk: $disk" - echo "==================================================" - - # Check if disk exists and is accessible - if [[ ! -b "$disk" ]]; then - print_color $RED "Error: $disk is not a valid block device" - echo "" - return - fi - - # Get disk information - local disk_info=$(get_disk_info "$disk") - local disk_type=$(echo "$disk_info" | cut -d'|' -f1) - local transport=$(echo "$disk_info" | cut -d'|' -f2) - local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) - - # Get basic disk information - local info=$(smartctl -i "$disk" 2>/dev/null) - local health=$(smartctl -H "$disk" 2>/dev/null) - local attributes=$(smartctl -A "$disk" 2>/dev/null) - - # Check if smartctl command succeeded - if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then - print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access." - echo "" - return - fi - - # Extract disk information - local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') - [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//') - - local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') - local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1) - local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') - - # For NVMe disks, try to get capacity from different fields - if [[ "$disk_type" == "NVMe" ]]; then - local nvme_capacity=$(get_nvme_capacity "$disk") - if [[ -n "$nvme_capacity" ]]; then - capacity="$nvme_capacity" - fi - fi - - # Extract capacity in GB and human readable format - local capacity_gb=$(extract_capacity_gb "$capacity") - local capacity_human="" - - # If capacity extraction failed, try alternative methods - if [[ "$capacity_gb" -eq 0 ]]; then - # Try to get capacity from model name (especially for HDD/SAS) - local model_capacity=$(extract_capacity_from_model "$model" "$disk_type") - if [[ "$model_capacity" -gt 0 ]]; then - capacity_gb="$model_capacity" - capacity_human=$(get_human_capacity "$capacity_gb") - else - # Final fallback based on disk type and common sizes - if [[ "$disk_type" == "HDD" ]]; then - # Common HDD sizes - if [[ "$model" =~ ST2000 ]]; then - capacity_gb=2000 - elif [[ "$model" =~ ST1000 ]]; then - capacity_gb=1000 - elif [[ "$model" =~ ST4000 ]]; then - capacity_gb=4000 - elif [[ "$model" =~ ST3000 ]]; then - capacity_gb=3000 - else - capacity_gb=0 - fi - elif [[ "$disk_type" == "SSD" ]]; then - # Common SSD sizes - if [[ "$model" =~ 960[Gg] ]]; then - capacity_gb=960 - elif [[ "$model" =~ 480[Gg] ]]; then - capacity_gb=480 - elif [[ "$model" =~ 240[Gg] ]]; then - capacity_gb=240 - elif [[ "$model" =~ 120[Gg] ]]; then - capacity_gb=120 - else - capacity_gb=0 - fi - elif [[ "$disk_type" == "NVMe" ]]; then - # Common NVMe sizes - if [[ "$model" =~ 500[Gg] ]]; then - capacity_gb=500 - elif [[ "$model" =~ 1000[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then - capacity_gb=1000 - elif [[ "$model" =~ 2000[Gg] ]] || [[ "$model" =~ 2[Tt] ]]; then - capacity_gb=2000 - else - capacity_gb=0 - fi - fi - capacity_human=$(get_human_capacity "$capacity_gb") - fi - else - capacity_human=$(get_human_capacity "$capacity_gb") - fi - - # If we still don't have capacity, show unknown - if [[ "$capacity_gb" -eq 0 ]]; then - capacity_human="Unknown" - fi - - local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') - [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') - - # Extract SMART attributes - local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1) - local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw") - local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1) - local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1) - local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1) - local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1) - local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1) - local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1) - - # For NVMe disks using smartctl extended attributes - if [[ "$disk_type" == "NVMe" ]]; then - local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null) - # Extract data units written for NVMe - local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',') - if [[ -n "$data_units_written" ]]; then - # Convert data units to sectors (1 data unit = 1000 sectors for NVMe) - total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) - else - # Try alternative field - data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',') - if [[ -n "$data_units_written" ]]; then - total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null) - fi - fi - # Get power on hours for NVMe - local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}') - if [[ -n "$nvme_power_hours" ]]; then - power_on_hours="$nvme_power_hours" - fi - fi - - # Display basic information - echo "Model: ${model:-Unknown}" - echo "Serial: ${serial:-Unknown}" - echo "Type: $disk_type" - echo "Interface: $transport" - echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" - echo "Capacity: $capacity_human" - echo "Firmware: ${firmware:-Unknown}" - echo "Health: ${health_status:-Unknown}" - echo "Power On Hours: ${power_on_hours:-Unknown}" - - # Disk type specific analysis - if [[ "$disk_type" == "HDD" ]]; then - echo "Realloc Sectors: ${reallocated_sectors:-0}" - echo "Pending Sectors: ${pending_sectors:-0}" - [[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}" - [[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}" - - local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type") - echo "Lifespan: $lifespan" - - elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then - local tbw_used=0 - if [[ -n "$total_written" && "$total_written" != "0" ]]; then - tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") - elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then - tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") - fi - - local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type") - - echo "TBW Used: ${tbw_used} TB" - echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" - - local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type") - local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) - local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) - local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) - - if [[ "$estimated_endurance" != "N/A" ]]; then - echo "TBW Remaining: $tbw_remaining" - echo "Lifespan: $lifespan_percent ($wear_status)" - fi - - # Show mechanical attributes for SAS drives that might be SSDs - if [[ "$disk_type" == "SAS" ]]; then - echo "Realloc Sectors: ${reallocated_sectors:-0}" - echo "Pending Sectors: ${pending_sectors:-0}" - fi - else - print_color $YELLOW "Unknown disk type - limited information available" - fi - - echo "" -} - -# Function to detect all disks -detect_disks() { - local disks=() - - # Check for SATA/SAS disks - for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - # Check for NVMe disks (base devices only, no partitions) - for disk in /dev/nvme[0-9]n[0-9]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - # Check for other disk types - for disk in /dev/vd[a-z] /dev/xvd[a-z]; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - fi - done - - echo "${disks[@]}" -} - -# Main function -main() { - print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu" - print_color $BLUE "==============================================" - echo "" - - check_dependencies - - local disks=() - - # Check for soft-raid first - check_mdraid - - # If specific disk provided, check only that disk - if [[ $# -gt 0 ]]; then - for disk in "$@"; do - if [[ -b "$disk" ]]; then - disks+=("$disk") - else - print_color $RED "Error: $disk is not a valid block device" - fi - done - else - # Auto-detect disks - print_color $CYAN "Auto-detecting disks..." - read -ra disks <<< "$(detect_disks)" - fi - - if [[ ${#disks[@]} -eq 0 ]]; then - print_color $RED "No disks found or accessible" - echo "Try running as root or specifying disk paths manually" - exit 1 - fi - - print_color $GREEN "Found ${#disks[@]} disk(s) to check" - echo "" - - # Check if running as root, warn if not - if [[ $EUID -ne 0 ]]; then - print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible." - echo "For complete results, run as: sudo $0" - echo "" - fi - - # Check each disk - for disk in "${disks[@]}"; do - check_disk "$disk" - done - - print_color $BLUE "Check completed!" - echo "" - print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers." - print_color $YELLOW " Actual endurance for your specific drive model may be higher." -} - -# Usage information -usage() { - echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" - echo "" - echo "If no disks specified, auto-detects all available disks" - echo "" - echo "Examples:" - echo " $SCRIPT_NAME # Check all auto-detected disks" - echo " sudo $SCRIPT_NAME # Check all disks (as root)" - echo " $SCRIPT_NAME /dev/sda # Check specific disk" - echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" - echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" - echo "" - echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" -} - -# Parse command line arguments -case "${1:-}" in - -h|--help) - usage - exit 0 - ;; - -v|--version) - echo "$SCRIPT_NAME version $VERSION" - exit 0 - ;; - *) - main "$@" - ;; -esac