moved old files

This commit is contained in:
Adam.Tony
2025-10-22 15:02:09 +08:00
parent 6c2664367c
commit e7c4a83b73
34 changed files with 4343 additions and 2056 deletions

0
alma-v2.1.sh → old/alma-v2.1.sh Executable file → Normal file
View File

0
alma-v2.3.sh → old/alma-v2.3.sh Executable file → Normal file
View File

0
alma-v2.5.sh → old/alma-v2.5.sh Executable file → Normal file
View File

0
alma-v2.6.sh → old/alma-v2.6.sh Executable file → Normal file
View File

128
ubuntu-v2.5.sh → old/alma-v2.7.sh Executable file → Normal file
View File

@@ -1,11 +1,11 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu
# Disk Health Check Script for Alma Linux 9
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
VERSION="2.7"
# Color codes
RED=$(tput setaf 1)
@@ -42,7 +42,7 @@ check_dependencies() {
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
echo "Install with: sudo dnf install ${missing[*]}"
exit 1
fi
}
@@ -332,34 +332,46 @@ check_mdraid() {
done
}
# Function to extract capacity in GB from various formats
# Function to extract capacity in GB from various formats - IMPROVED VERSION
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: [1.82 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: [500.1 GB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
# Remove extra spaces and normalize
capacity=$(echo "$capacity" | sed 's/ */ /g')
# Method 1: Try to extract from User Capacity field with bytes
if [[ $capacity =~ ([0-9,]+)\s*bytes ]]; then
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: 1.82TB
fi
# Method 2: Try to extract from bracket format [XXX GB] or [X.XX TB]
if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ \[([0-9,.]+)\s*([GT])B?\] ]]; then
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
local unit="${BASH_REMATCH[2]}"
if [[ "$unit" == "T" ]]; then
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
fi
# Method 3: Try direct TB/GB pattern matching
if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*TB ]]; then
local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: 500.1GB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
elif [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,.]+)\s*GB ]]; then
local size=$(echo "$capacity" | grep -oE '[0-9,.]+' | head -1 | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
# Method 4: For NVMe - try different field formats
if [[ $capacity_gb -eq 0 ]] && [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
echo "0"
@@ -388,8 +400,7 @@ get_nvme_capacity() {
# Function to get human readable capacity
get_human_capacity() {
local capacity=$1
local capacity_gb=$2
local capacity_gb=$1
if [[ $capacity_gb -ge 1000 ]]; then
echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB"
@@ -398,6 +409,36 @@ get_human_capacity() {
fi
}
# Function to get capacity using block device information as fallback
get_block_device_capacity() {
local disk=$1
local capacity_gb=0
# Try to get capacity from block device using lsblk or fdisk
if command_exists lsblk; then
local block_size=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null | head -1)
if [[ -n "$block_size" && "$block_size" =~ ^[0-9]+$ ]]; then
capacity_gb=$(echo "scale=0; $block_size / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
fi
fi
# Alternative method using fdisk
if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then
local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk")
if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
local unit="${BASH_REMATCH[2]}"
if [[ "$unit" == "T" ]]; then
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
fi
fi
echo "$capacity_gb"
}
# Function to check a single disk
check_disk() {
local disk=$1
@@ -431,12 +472,12 @@ check_disk() {
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
@@ -448,27 +489,18 @@ check_disk() {
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb")
local capacity_human=""
# If capacity extraction failed, try alternative method
# If capacity extraction failed, try block device method
if [[ "$capacity_gb" -eq 0 ]]; then
# Try to get capacity from model name or other methods
if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then
capacity_gb=1000
capacity_human="1 TB"
elif [[ "$model" =~ 2[Tt] ]]; then
capacity_gb=2000
capacity_human="2 TB"
elif [[ "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 250[Gg] ]]; then
capacity_gb=250
capacity_human="250 GB"
fi
capacity_gb=$(get_block_device_capacity "$disk")
fi
# Generate human readable capacity
if [[ "$capacity_gb" -gt 0 ]]; then
capacity_human=$(get_human_capacity "$capacity_gb")
else
capacity_human="Unknown"
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
@@ -592,8 +624,8 @@ detect_disks() {
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu"
print_color $BLUE "=============================================="
print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9"
print_color $BLUE "===================================================="
echo ""
check_dependencies

0
harvester-v2.1.sh → old/harvester-v2.1.sh Executable file → Normal file
View File

0
harvester-v2.3.sh → old/harvester-v2.3.sh Executable file → Normal file
View File

0
harvester-v2.5.sh → old/harvester-v2.5.sh Executable file → Normal file
View File

0
harvester-v2.6.sh → old/harvester-v2.6.sh Executable file → Normal file
View File

0
harvester-v2.7.sh → old/harvester-v2.7.sh Executable file → Normal file
View File

0
harvester-v2.9.sh → old/harvester-v2.9.sh Executable file → Normal file
View File

0
harvester-v3.0.sh → old/harvester-v3.0.sh Executable file → Normal file
View File

0
harvester-v3.1.sh → old/harvester-v3.1.sh Executable file → Normal file
View File

0
harvester-v3.2.sh → old/harvester-v3.2.sh Executable file → Normal file
View File

0
harvester-v3.3.sh → old/harvester-v3.3.sh Executable file → Normal file
View File

0
harvester-v3.4.sh → old/harvester-v3.4.sh Executable file → Normal file
View File

869
old/harvester-v3.8.sh Normal file
View File

@@ -0,0 +1,869 @@
#!/bin/bash
# Disk Health Check Script for Harvester OS
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
# Created by Adam T. Lau
SCRIPT_NAME=$(basename "$0")
VERSION="3.8"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if required commands are installed
command_exists() {
command -v "$1" >/dev/null 2>&1
}
if ! command_exists smartctl; then
print_color $RED "Error: smartctl is not installed. Please install smartmontools package."
exit 1
fi
# Known model capacities
declare -A MODEL_CAPACITIES=(
["ST91000640NS"]="1000"
["ST2000NM0033"]="2000"
["ST4000NM0033"]="4000"
["MB1000GCWCV"]="1000"
["MB2000GCWDB"]="2000"
["AL15SEB120N"]="1200"
["AL15SEB600N"]="600"
["HUC101212CSS600"]="1200"
["HUC103012CSS600"]="3000"
["HUC109090CSS600"]="900"
["MAX3147RC"]="147"
["ST3146356SS"]="146"
["ST3146855SS"]="146"
["ST33000650SS"]="3000"
["ST3600057SS"]="600"
["ST9146803SS"]="146"
["ST973451SS"]="73"
["AL13SXB300N"]="300"
["KPM6XRUG960G"]="960"
["MZILT3T8HBLS0D3"]="3840"
["MZILT960HBHQ0D3"]="960"
# Add more models as encountered
)
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
echo "8000"
}
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
smart_cmd+=" -i $disk"
local info=$($smart_cmd 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
transport="SAS"
is_enterprise=true
# Determine if SAS disk is HDD or SSD
if echo "$info" | grep -qi "Solid State Device\|SSD"; then
disk_type="SAS SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="SAS HDD"
else
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -n "$model" ]]; then
if echo "$model" | grep -qi "SSD\|Solid State"; then
disk_type="SAS SSD"
else
disk_type="SAS HDD"
fi
else
disk_type="SAS HDD"
fi
fi
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device\|SSD"; then
disk_type="SATA SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="SATA HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -n "$model" ]]; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
if echo "$model" | grep -qi "SSD\|Solid State" && [[ "$disk_type" == "UNKNOWN" ]]; then
disk_type="SSD"
[[ "$transport" == "" ]] && transport="SATA"
fi
fi
if [[ "$disk_type" == "UNKNOWN" ]]; then
disk_type="Unknown"
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to get SAS disk attributes
get_sas_attributes() {
local disk=$1
local controller=$2
local disk_type=$3
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local power_on_hours=""
local reallocated_sectors=""
local pending_sectors=""
local start_stop_count=""
local load_cycle_count=""
local total_written=""
local temperature=""
local model=""
local serial=""
local firmware=""
local media_wearout=""
local percent_lifetime_used=""
local has_write_data=false
# Try extended information first for SAS disks
local attributes=$($smart_cmd -x "$disk" 2>/dev/null)
# If extended fails, try standard attributes
if [[ -z "$attributes" ]]; then
attributes=$($smart_cmd -a "$disk" 2>/dev/null)
fi
if [[ -n "$attributes" ]]; then
# Extract model information
model=$(echo "$attributes" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$attributes" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
serial=$(echo "$attributes" | grep -i "Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//')
firmware=$(echo "$attributes" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract power on hours - try multiple formats
power_on_hours=$(echo "$attributes" | grep -i "Accumulated power on time" | grep -oE "[0-9]+:[0-9]+" | head -1)
if [[ -n "$power_on_hours" ]]; then
local hours=$(echo "$power_on_hours" | cut -d: -f1)
power_on_hours="$hours"
else
# Try alternative format
power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $NF}' | head -1)
fi
# Extract temperature
temperature=$(echo "$attributes" | grep -i "Current Drive Temperature" | grep -oE "[0-9]+" | head -1)
[[ -z "$temperature" ]] && temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}' | head -1)
# Extract mechanical counters for SAS HDDs
if [[ "$disk_type" == "SAS HDD" ]]; then
start_stop_count=$(echo "$attributes" | grep -i "Accumulated start-stop cycles" | grep -oE "[0-9]+" | head -1)
load_cycle_count=$(echo "$attributes" | grep -i "Accumulated load-unload cycles" | grep -oE "[0-9]+" | head -1)
fi
# Extract error counters
local error_count=$(echo "$attributes" | grep -i "Elements in grown defect list" | grep -oE "[0-9]+" | head -1)
if [[ -n "$error_count" ]]; then
reallocated_sectors="$error_count"
fi
# For SAS SSDs, look for comprehensive wear indicators
if [[ "$disk_type" == "SAS SSD" ]]; then
# Try multiple patterns for write data
total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | awk '{print $NF}' | head -1)
[[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $NF}' | head -1)
[[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Lifetime_Writes" | awk '{print $NF}' | head -1)
[[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "NAND_Writes" | awk '{print $NF}' | head -1)
# Check if we actually found write data
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
has_write_data=true
fi
# Look for wear level indicators
media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator" | awk '{print $NF}' | head -1)
percent_lifetime_used=$(echo "$attributes" | grep -i "Percent_Lifetime_Used" | awk '{print $NF}' | head -1)
[[ -z "$percent_lifetime_used" ]] && percent_lifetime_used=$(echo "$attributes" | grep -i "Wear_Leveling_Count" | awk '{print $NF}' | head -1)
fi
fi
echo "$power_on_hours|$reallocated_sectors|$pending_sectors|$start_stop_count|$load_cycle_count|$temperature|$model|$serial|$firmware|$total_written|$media_wearout|$percent_lifetime_used|$has_write_data"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$((bytes / 1000000000000))
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$((raw_value * 32 / 1000000))
else
# SATA/SAS SSD: various manufacturers
tbw=$((raw_value * 32 / 1000000))
fi
fi
echo "$tbw"
}
# Function to get estimated endurance - SIMPLIFIED FOR SAS SSDs
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
local has_write_data=$4
# HDDs don't have TBW
if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" || "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
# For SAS SSDs without write data, don't provide unrealistic estimates
if [[ "$disk_type" == "SAS SSD" && "$has_write_data" == "false" ]]; then
echo "UNKNOWN"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
local percent_lifetime_used=$5
local has_write_data=$6
# For SAS SSDs without write data, be honest about limitations
if [[ "$disk_type" == "SAS SSD" && "$has_write_data" == "false" ]]; then
echo "N/A|N/A|Cannot determine - SAS SSD does not expose write statistics"
return
fi
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown|Unknown|New drive"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|HDD - no endurance rating"
return
fi
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$((estimated_endurance - clean_tbw_used))
if [[ $clean_tbw_used -gt 0 ]]; then
local lifespan_used=$((clean_tbw_used * 100 / estimated_endurance))
local lifespan_remaining=$((100 - lifespan_used))
if [[ $lifespan_used -ge 80 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear"
elif [[ $lifespan_used -ge 50 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy"
fi
else
echo "Unknown|${estimated_endurance} TB|New"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
local temperature=$7
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Temperature warning
if [[ -n "$temperature" && "$temperature" -gt 50 ]]; then
severity=$((severity + 2))
elif [[ -n "$temperature" && "$temperature" -gt 40 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to get capacity using direct block device methods
get_disk_capacity() {
local disk=$1
local controller=$2
local disk_type=$3
local capacity_gb=0
local capacity_human="Unknown"
# Method 1: Try lsblk first
if command_exists lsblk; then
local lsblk_output=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null)
if [[ -n "$lsblk_output" && "$lsblk_output" =~ ^[0-9]+$ ]]; then
capacity_gb=$((lsblk_output / 1000000000))
fi
fi
# Method 2: Try fdisk
if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then
local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk")
if [[ -n "$fdisk_info" ]]; then
if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
local unit="${BASH_REMATCH[2]}"
if [[ "$unit" == "T" ]]; then
capacity_gb=$((size * 1000))
else
capacity_gb=$size
fi
elif [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])B ]]; then
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
local unit="${BASH_REMATCH[2]}"
if [[ "$unit" == "T" ]]; then
capacity_gb=$((size * 1000))
else
capacity_gb=$size
fi
elif [[ $fdisk_info =~ ([0-9,.]+)\s*bytes ]]; then
local bytes=$(echo "$fdisk_info" | grep -oE '[0-9,]+' | head -1 | tr -d ',')
capacity_gb=$((bytes / 1000000000))
fi
fi
fi
# Method 3: Try blockdev
if [[ $capacity_gb -eq 0 ]] && command_exists blockdev; then
local blockdev_size=$(blockdev --getsize64 "$disk" 2>/dev/null)
if [[ -n "$blockdev_size" && "$blockdev_size" =~ ^[0-9]+$ ]]; then
capacity_gb=$((blockdev_size / 1000000000))
fi
fi
# Method 4: Model-based lookup
if [[ $capacity_gb -eq 0 ]]; then
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -n "$model" && -n "${MODEL_CAPACITIES[$model]}" ]]; then
capacity_gb="${MODEL_CAPACITIES[$model]}"
fi
fi
# Generate human readable capacity
if [[ $capacity_gb -gt 0 ]]; then
if [[ $capacity_gb -ge 1000 ]]; then
local tb_capacity=$((capacity_gb / 1000))
capacity_human="${tb_capacity} TB"
else
capacity_human="${capacity_gb} GB"
fi
else
capacity_human="Unknown"
fi
echo "$capacity_gb|$capacity_human"
}
# Function to check a single disk
check_disk() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Check if we can read the disk
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$firmware" ]] && firmware=$(echo "$info" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Get capacity
local capacity_info=$(get_disk_capacity "$disk" "$controller" "$disk_type")
local capacity_gb=$(echo "$capacity_info" | cut -d'|' -f1)
local capacity_human=$(echo "$capacity_info" | cut -d'|' -f2)
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART Health Status" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes based on disk type
local power_on_hours=""
local reallocated_sectors=""
local pending_sectors=""
local start_stop_count=""
local load_cycle_count=""
local total_written=""
local host_writes_32mib=""
local temperature=""
local sas_model=""
local sas_serial=""
local sas_firmware=""
local sas_total_written=""
local media_wearout=""
local percent_lifetime_used=""
local has_write_data="false"
if [[ "$disk_type" == "SAS HDD" || "$disk_type" == "SAS SSD" ]]; then
local sas_attrs=$(get_sas_attributes "$disk" "$controller" "$disk_type")
power_on_hours=$(echo "$sas_attrs" | cut -d'|' -f1)
reallocated_sectors=$(echo "$sas_attrs" | cut -d'|' -f2)
pending_sectors=$(echo "$sas_attrs" | cut -d'|' -f3)
start_stop_count=$(echo "$sas_attrs" | cut -d'|' -f4)
load_cycle_count=$(echo "$sas_attrs" | cut -d'|' -f5)
temperature=$(echo "$sas_attrs" | cut -d'|' -f6)
sas_model=$(echo "$sas_attrs" | cut -d'|' -f7)
sas_serial=$(echo "$sas_attrs" | cut -d'|' -f8)
sas_firmware=$(echo "$sas_attrs" | cut -d'|' -f9)
sas_total_written=$(echo "$sas_attrs" | cut -d'|' -f10)
media_wearout=$(echo "$sas_attrs" | cut -d'|' -f11)
percent_lifetime_used=$(echo "$sas_attrs" | cut -d'|' -f12)
has_write_data=$(echo "$sas_attrs" | cut -d'|' -f13)
# Use SAS-extracted data if available
[[ -n "$sas_model" ]] && model="$sas_model"
[[ -n "$sas_serial" ]] && serial="$sas_serial"
[[ -n "$sas_firmware" ]] && firmware="$sas_firmware"
[[ -n "$sas_total_written" ]] && total_written="$sas_total_written"
else
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -n "$attributes" ]]; then
power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}')
reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}')
pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}')
start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}')
load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}')
total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}')
host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}')
# For non-SAS SSDs, we assume write data is available if we found any
if [[ "$disk_type" == "SATA SSD" || "$disk_type" == "NVMe" ]]; then
if [[ -n "$total_written" || -n "$host_writes_32mib" ]]; then
has_write_data="true"
fi
fi
fi
fi
# Clean up extracted values
power_on_hours=$(extract_numeric_hours "$power_on_hours")
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
start_stop_count=${start_stop_count:-0}
load_cycle_count=${load_cycle_count:-0}
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Show temperature if available
if [[ -n "$temperature" && "$temperature" != "0" ]]; then
echo "Temperature: ${temperature} C"
fi
# Only show Power On Hours if we have a valid value
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Show wear indicators for SSDs if available
if [[ "$disk_type" == "SAS SSD" || "$disk_type" == "SATA SSD" ]]; then
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
echo "Media Wearout: $media_wearout"
fi
if [[ -n "$percent_lifetime_used" && "$percent_lifetime_used" != "0" ]]; then
echo "Lifetime Used: ${percent_lifetime_used}%"
fi
fi
# Disk type specific analysis
if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
echo "Pending Sectors: $pending_sectors"
# Only show mechanical counters if we have values
if [[ -n "$start_stop_count" && "$start_stop_count" != "0" ]]; then
echo "Start/Stop Count: $start_stop_count"
fi
if [[ -n "$load_cycle_count" && "$load_cycle_count" != "0" ]]; then
echo "Load Cycle Count: $load_cycle_count"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "$reallocated_sectors" "$pending_sectors" "$start_stop_count" "$load_cycle_count" "$disk_type" "$temperature")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SATA SSD" || "$disk_type" == "SAS SSD" || "$disk_type" == "NVMe" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type" "$has_write_data")
# Handle SAS SSDs without write data specially
if [[ "$disk_type" == "SAS SSD" && "$estimated_endurance" == "UNKNOWN" ]]; then
echo "TBW Used: Not available"
echo "TBW Endurance: Not available (SAS SSD does not expose write statistics)"
echo "Lifespan: ${GREEN}Healthy${NC} (based on SMART health status)"
else
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type" "$percent_lifetime_used" "$has_write_data")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
else
echo "TBW Used: ${tbw_used} TB"
echo "Lifespan: Unknown (Cannot estimate without usage data)"
fi
fi
if [[ "$disk_type" == "SAS SSD" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
echo "Pending Sectors: $pending_sectors"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
echo "Realloc Sectors: $reallocated_sectors"
echo "Pending Sectors: $pending_sectors"
fi
echo ""
}
# Function to detect RAID controllers and disks
detect_raid_disks() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto")
local disks=()
# Check for direct disks first
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do
if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then
disks+=("$base_disk:$controller,$i")
break
fi
done
done
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS"
print_color $BLUE "Created by Adam T. Lau"
print_color $BLUE "===================================================="
echo ""
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_raid_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $YELLOW "No disks found via auto-detection"
print_color $CYAN "Trying direct disk access..."
for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check each disk
for disk_info in "${disks[@]}"; do
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
done
print_color $BLUE "Check completed!"
echo ""
print_color $YELLOW "Note: SAS SSDs often do not expose write statistics through SMART."
print_color $YELLOW " TBW information may not be available for these drives."
print_color $YELLOW " SSD/NVMe TBW endurance may be higher depending on the specific model."
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks and RAID arrays"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
echo "Created by Adam T. Lau"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
echo "Created by Adam T. Lau"
exit 0
;;
*)
main "$@"
;;
esac

0
old/alma-v2.4.sh → old/obsolete/alma-v2.4.sh Executable file → Normal file
View File

View File

615
old/obsolete/ubuntu-v2.5.sh Normal file
View File

@@ -0,0 +1,615 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data - FIXED VERSION
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if SMART is available - FIXED PARSING
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status - FIXED LOGIC
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model")
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Used: ${tbw_used} TB"
fi
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Remaining: $tbw_remaining"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions) - FIXED
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific) - FIXED
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function - FIXED
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks - FIXED: don't mix output with disk detection
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

650
old/obsolete/ubuntu-v2.6.sh Normal file
View File

@@ -0,0 +1,650 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.6"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data - ENHANCED FOR NVMe
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if this is an NVMe drive
if echo "$smart_info" | grep -qi "NVMe"; then
# NVMe drives have different SMART implementation
if $smart_cmd -H "$disk" &>/dev/null; then
echo "full_access"
else
echo "no_attributes"
fi
return
fi
# Check if SMART is available for SATA/SAS
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS and NVMe support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks
local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device\|NVMe"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1 | sed 's/[^0-9]//g')
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB\|Data Units Written" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left\|Percentage Used\|Available Spare" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
local attribute_name=$4
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
# NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+, 512,000 bytes for older)
if echo "$attribute_name" | grep -qi "Data Units Written"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from data units to TB (assuming 1,000,000 bytes per unit)
local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null)
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
fi
# NVMe SSDs typically have higher endurance
elif echo "$disk_model" | grep -qi "NVMe"; then
if [[ $capacity_gb -ge 2000 ]]; then
echo "1200" # 1.2PB for 2TB+ NVMe
elif [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB NVMe
elif [[ $capacity_gb -ge 500 ]]; then
echo "300" # 300TB for 500GB NVMe
else
echo "150" # 150TB for smaller NVMe
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining - ENHANCED
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan - FIXED POWER_ON_HOURS PARSING
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
# Clean power_on_hours to extract just the numeric part
local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//')
clean_hours=${clean_hours:-0}
if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$clean_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)"
elif [[ "$clean_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
# Get the actual attribute name for TBW calculation
local attributes=$(smartctl -A "$disk" 2>/dev/null)
local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written" | head -1 | awk '{print $2}')
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model" "$tbw_attribute_name")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name")
fi
# Always show TBW information for SSDs
echo "TBW Used: ${tbw_used} TB"
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions)
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific)
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

690
old/obsolete/ubuntu-v2.7.sh Normal file
View File

@@ -0,0 +1,690 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.7"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS and NVMe support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# For NVMe drives, use -x for extended information
if echo "$info" | grep -qi "NVMe"; then
local nvme_info=$($smart_cmd -x "$disk" 2>/dev/null)
attributes="$nvme_info"
fi
# Extract information with multiple fallbacks
local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device\|NVMe"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=""
# Try different power on hours attributes
if echo "$attributes" | grep -qi "Power_On_Hours"; then
power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | head -1 | awk '{print $10}')
elif echo "$attributes" | grep -qi "Power On Hours"; then
power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}')
fi
# Clean power_on_hours to remove non-numeric characters for HDDs
if [[ "$disk_type" == "HDD" ]]; then
power_on_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//')
fi
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=""
local host_writes_32mib=""
# Try different write attributes for different drive types
if echo "$model" | grep -qi "KINGSTON.*SA400"; then
# Kingston SA400 uses Flash_Writes_GiB and Lifetime_Writes_GiB
total_written=$(echo "$attributes" | grep -i "Flash_Writes_GiB" | awk '{print $NF}' | head -1)
[[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB" | awk '{print $NF}' | head -1)
elif echo "$info" | grep -qi "NVMe"; then
# NVMe drives use Data Units Written
total_written=$(echo "$attributes" | grep -i "Data Units Written" | head -1 | awk '{print $NF}')
# Also try to get power on hours from NVMe
if [[ -z "$power_on_hours" ]]; then
power_on_hours=$(echo "$attributes" | grep -i "Power On Hours" | head -1 | awk '{print $NF}')
fi
else
# Standard SATA SSDs
total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | awk '{print $10}' | head -1)
host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
fi
# For wear leveling indicators
local media_wearout=""
if echo "$model" | grep -qi "KINGSTON.*SA400"; then
media_wearout=$(echo "$attributes" | grep -i "SSD_Life_Left" | awk '{print $NF}' | head -1)
elif echo "$info" | grep -qi "NVMe"; then
media_wearout=$(echo "$attributes" | grep -i "Percentage Used" | head -1 | awk '{print $NF}')
[[ -z "$media_wearout" ]] && media_wearout=$(echo "$attributes" | grep -i "Available Spare" | head -1 | awk '{print $NF}')
else
media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count" | awk '{print $10}' | head -1)
fi
# If media_wearout is percentage used for NVMe, convert to remaining life
if echo "$info" | grep -qi "NVMe" && [[ -n "$media_wearout" ]]; then
if echo "$attributes" | grep -qi "Percentage Used"; then
# Convert percentage used to percentage remaining
media_wearout=$((100 - media_wearout))
fi
fi
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
local attribute_name=$4
# Kingston SA400 SSDs use Flash_Writes_GiB (value in GiB)
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB (1 TiB = 1024 GiB, but using 1000 for TB)
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
# NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+)
if echo "$disk_model" | grep -qi "NVMe"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from data units to TB (1 unit = 1,000,000 bytes)
local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null)
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
# Standard SATA SSDs with Total_LBAs_Written
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Host_Writes_32MiB (value in 32MiB chunks)
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
fi
# Kingston NVMe SSDs
elif echo "$disk_model" | grep -qi "KINGSTON.*SA2000"; then
if [[ $capacity_gb -ge 2000 ]]; then
echo "800" # 800TB for 2TB Kingston NVMe
elif [[ $capacity_gb -ge 1000 ]]; then
echo "400" # 400TB for 1TB Kingston NVMe
elif [[ $capacity_gb -ge 500 ]]; then
echo "200" # 200TB for 500GB Kingston NVMe
else
echo "100" # 100TB for smaller Kingston NVMe
fi
# NVMe SSDs typically have higher endurance
elif echo "$disk_model" | grep -qi "NVMe"; then
if [[ $capacity_gb -ge 2000 ]]; then
echo "1200" # 1.2PB for 2TB+ NVMe
elif [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB NVMe
elif [[ $capacity_gb -ge 500 ]]; then
echo "300" # 300TB for 500GB NVMe
else
echo "150" # 150TB for smaller NVMe
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
# Handle case where estimated_endurance might be empty
if [[ -z "$estimated_endurance" || "$estimated_endurance" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
# Clean power_on_hours to extract just the numeric part
local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//')
clean_hours=${clean_hours:-0}
if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$clean_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)"
elif [[ "$clean_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
# Get the actual attribute name for TBW calculation
local attributes=""
if echo "$model" | grep -qi "NVMe"; then
attributes=$(smartctl -x "$disk" 2>/dev/null)
else
attributes=$(smartctl -A "$disk" 2>/dev/null)
fi
local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written\|Total_LBAs_Written" | head -1 | awk '{print $2}')
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$total_written" "" "$model" "$tbw_attribute_name")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name")
fi
# Always show TBW information for SSDs
echo "TBW Used: ${tbw_used} TB"
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
else
# Try to extract capacity from raw number
capacity_gb=$(echo "$capacity" | grep -o '[0-9]*' | head -1)
capacity_gb=$((capacity_gb / 1000000000))
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions)
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific)
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

0
old/ubuntu-v2.8.sh → old/obsolete/ubuntu-v2.8.sh Executable file → Normal file
View File

0
old/ubuntu-v3.0.sh → old/obsolete/ubuntu-v3.0.sh Executable file → Normal file
View File

0
ubuntu-v2.3.sh → old/ubuntu-v2.3.sh Executable file → Normal file
View File

0
ubuntu-v2.4.sh → old/ubuntu-v2.4.sh Executable file → Normal file
View File

View File

@@ -1,8 +1,8 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
# Disk Health Check Script for Ubuntu
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
@@ -13,6 +13,7 @@ GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
@@ -46,190 +47,153 @@ check_dependencies() {
fi
}
# Function to test SMART access and get available data - FIXED VERSION
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if SMART is available - FIXED PARSING
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status - FIXED LOGIC
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
}
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 8TB, use proportional scaling from 4TB
echo "8000"
}
# Function to convert bytes to human readable
bytes_to_human() {
local bytes=$1
if [[ $bytes -ge 1099511627776 ]]; then
echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB"
elif [[ $bytes -ge 1073741824 ]]; then
echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB"
elif [[ $bytes -ge 1048576 ]]; then
echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB"
else
echo "$bytes bytes"
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
}
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
# Remove everything after non-numeric characters
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Function to get disk type and interface
get_disk_info() {
local disk=$1
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
@@ -237,60 +201,40 @@ estimate_ssd_endurance() {
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
echo "Unknown||Unknown|New"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
# Handle the case where tbw_used might have formatting issues
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance")
if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
@@ -299,185 +243,344 @@ estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
if [[ -z "$power_on_hours" ]]; then
# Extract numeric hours only
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check a single disk with enhanced error handling
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to extract capacity in GB from various formats
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: [1.82 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: [500.1 GB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: 1.82TB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: 500.1GB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
echo "0"
else
echo "$capacity_gb"
fi
}
# Function to get NVMe capacity using smartctl
get_nvme_capacity() {
local disk=$1
local nvme_info=$(smartctl -i "$disk" 2>/dev/null)
local capacity=""
# Try to get capacity from different fields
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
fi
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
fi
echo "$capacity"
}
# Function to get human readable capacity
get_human_capacity() {
local capacity=$1
local capacity_gb=$2
if [[ $capacity_gb -ge 1000 ]]; then
echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB"
else
echo "${capacity_gb} GB"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_capacity=$(get_nvme_capacity "$disk")
if [[ -n "$nvme_capacity" ]]; then
capacity="$nvme_capacity"
fi
fi
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb")
# If capacity extraction failed, try alternative method
if [[ "$capacity_gb" -eq 0 ]]; then
# Try to get capacity from model name or other methods
if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then
capacity_gb=1000
capacity_human="1 TB"
elif [[ "$model" =~ 2[Tt] ]]; then
capacity_gb=2000
capacity_human="2 TB"
elif [[ "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 250[Gg] ]]; then
capacity_gb=250
capacity_human="250 GB"
fi
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl extended attributes
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
# Extract data units written for NVMe
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
else
# Try alternative field
data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
fi
fi
# Get power on hours for NVMe
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
if [[ -n "$nvme_power_hours" ]]; then
power_on_hours="$nvme_power_hours"
fi
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model")
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model")
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Used: ${tbw_used} TB"
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions) - FIXED
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
@@ -487,40 +590,19 @@ detect_disks() {
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific) - FIXED
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function - FIXED
# Main function
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu"
print_color $BLUE "=============================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
@@ -531,17 +613,9 @@ main() {
fi
done
else
# Auto-detect disks - FIXED: don't mix output with disk detection
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
@@ -555,33 +629,20 @@ main() {
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
}
# Usage information
@@ -594,9 +655,10 @@ usage() {
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,759 +0,0 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.6"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 8TB, use proportional scaling from 4TB
echo "8000"
}
# Function to convert bytes to human readable
bytes_to_human() {
local bytes=$1
if [[ $bytes -ge 1099511627776 ]]; then
echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB"
elif [[ $bytes -ge 1073741824 ]]; then
echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB"
elif [[ $bytes -ge 1048576 ]]; then
echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB"
else
echo "$bytes bytes"
fi
}
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
# Remove everything after non-numeric characters
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
# Handle the case where tbw_used might have formatting issues
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance")
if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
# Extract numeric hours only
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to extract capacity in GB from various formats - IMPROVED FOR HDD/SAS
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Debug: Show what we're trying to parse
# echo "DEBUG: Parsing capacity: '$capacity'" >&2
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: [1.82 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
# echo "DEBUG: Pattern 1 - TB size: $size, GB: $capacity_gb" >&2
elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: [500.1 GB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
# echo "DEBUG: Pattern 2 - GB size: $size, GB: $capacity_gb" >&2
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
# echo "DEBUG: Pattern 3 - bytes: $bytes, GB: $capacity_gb" >&2
elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: 1.82TB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
# echo "DEBUG: Pattern 4 - TB size: $size, GB: $capacity_gb" >&2
elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: 500.1GB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
# echo "DEBUG: Pattern 5 - GB size: $size, GB: $capacity_gb" >&2
elif [[ $capacity =~ ([0-9,]+)\s*[Bb] ]]; then
# Pattern: 2000398934016B
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
# echo "DEBUG: Pattern 6 - bytes: $bytes, GB: $capacity_gb" >&2
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
echo "0"
else
echo "$capacity_gb"
fi
}
# Function to get NVMe capacity using smartctl
get_nvme_capacity() {
local disk=$1
local nvme_info=$(smartctl -i "$disk" 2>/dev/null)
local capacity=""
# Try to get capacity from different fields
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
fi
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
fi
echo "$capacity"
}
# Function to get human readable capacity
get_human_capacity() {
local capacity_gb=$1
if [[ $capacity_gb -ge 1000 ]]; then
echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB"
else
echo "${capacity_gb} GB"
fi
}
# Function to extract capacity from model name for HDD/SAS drives
extract_capacity_from_model() {
local model=$1
local disk_type=$2
# Common HDD/SAS capacity patterns in model names
if [[ "$model" =~ 2[Tt][Bb] ]] || [[ "$model" =~ 2000[Gg] ]]; then
echo "2000"
elif [[ "$model" =~ 1[Tt][Bb] ]] || [[ "$model" =~ 1000[Gg] ]]; then
echo "1000"
elif [[ "$model" =~ 4[Tt][Bb] ]] || [[ "$model" =~ 4000[Gg] ]]; then
echo "4000"
elif [[ "$model" =~ 8[Tt][Bb] ]] || [[ "$model" =~ 8000[Gg] ]]; then
echo "8000"
elif [[ "$model" =~ 500[Gg] ]]; then
echo "500"
elif [[ "$model" =~ 250[Gg] ]]; then
echo "250"
else
# Try to extract numbers that look like capacities
local capacity_match=$(echo "$model" | grep -oE '[0-9]+[GT]B' | head -1)
if [[ -n "$capacity_match" ]]; then
if [[ "$capacity_match" =~ ([0-9]+)TB ]]; then
echo "$((${BASH_REMATCH[1]} * 1000))"
elif [[ "$capacity_match" =~ ([0-9]+)GB ]]; then
echo "${BASH_REMATCH[1]}"
fi
else
echo "0"
fi
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Get disk information
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep -i "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep -i "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_capacity=$(get_nvme_capacity "$disk")
if [[ -n "$nvme_capacity" ]]; then
capacity="$nvme_capacity"
fi
fi
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=""
# If capacity extraction failed, try alternative methods
if [[ "$capacity_gb" -eq 0 ]]; then
# Try to get capacity from model name (especially for HDD/SAS)
local model_capacity=$(extract_capacity_from_model "$model" "$disk_type")
if [[ "$model_capacity" -gt 0 ]]; then
capacity_gb="$model_capacity"
capacity_human=$(get_human_capacity "$capacity_gb")
else
# Final fallback based on disk type and common sizes
if [[ "$disk_type" == "HDD" ]]; then
# Common HDD sizes
if [[ "$model" =~ ST2000 ]]; then
capacity_gb=2000
elif [[ "$model" =~ ST1000 ]]; then
capacity_gb=1000
elif [[ "$model" =~ ST4000 ]]; then
capacity_gb=4000
elif [[ "$model" =~ ST3000 ]]; then
capacity_gb=3000
else
capacity_gb=0
fi
elif [[ "$disk_type" == "SSD" ]]; then
# Common SSD sizes
if [[ "$model" =~ 960[Gg] ]]; then
capacity_gb=960
elif [[ "$model" =~ 480[Gg] ]]; then
capacity_gb=480
elif [[ "$model" =~ 240[Gg] ]]; then
capacity_gb=240
elif [[ "$model" =~ 120[Gg] ]]; then
capacity_gb=120
else
capacity_gb=0
fi
elif [[ "$disk_type" == "NVMe" ]]; then
# Common NVMe sizes
if [[ "$model" =~ 500[Gg] ]]; then
capacity_gb=500
elif [[ "$model" =~ 1000[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then
capacity_gb=1000
elif [[ "$model" =~ 2000[Gg] ]] || [[ "$model" =~ 2[Tt] ]]; then
capacity_gb=2000
else
capacity_gb=0
fi
fi
capacity_human=$(get_human_capacity "$capacity_gb")
fi
else
capacity_human=$(get_human_capacity "$capacity_gb")
fi
# If we still don't have capacity, show unknown
if [[ "$capacity_gb" -eq 0 ]]; then
capacity_human="Unknown"
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl extended attributes
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
# Extract data units written for NVMe
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
else
# Try alternative field
data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
fi
fi
# Get power on hours for NVMe
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
if [[ -n "$nvme_power_hours" ]]; then
power_on_hours="$nvme_power_hours"
fi
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu"
print_color $BLUE "=============================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
echo ""
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac