added v2.5

This commit is contained in:
2025-10-22 04:35:22 +08:00
parent 43c0aec446
commit 5f8ae02d71
15 changed files with 4645 additions and 382 deletions

531
alma-v2.3.sh Executable file
View File

@@ -0,0 +1,531 @@
#!/bin/bash
# Disk Health Check Script for Alma Linux 9
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.3"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo dnf install ${missing[*]}"
exit 1
fi
}
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 4TB, use 4TB tier with scaling
echo "4000"
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$power_on_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$power_on_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$power_on_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Get disk information
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract capacity in GB
local capacity_gb=0
if echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
power_on_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
total_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
[[ -z "$total_written" ]] && total_written=$(echo "$nvme_attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9"
print_color $BLUE "===================================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

677
alma-v2.5.sh Executable file
View File

@@ -0,0 +1,677 @@
#!/bin/bash
# Disk Health Check Script for Alma Linux 9
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo dnf install ${missing[*]}"
exit 1
fi
}
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 8TB, use proportional scaling from 4TB
echo "8000"
}
# Function to convert bytes to human readable
bytes_to_human() {
local bytes=$1
if [[ $bytes -ge 1099511627776 ]]; then
echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB"
elif [[ $bytes -ge 1073741824 ]]; then
echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB"
elif [[ $bytes -ge 1048576 ]]; then
echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB"
else
echo "$bytes bytes"
fi
}
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
# Remove everything after non-numeric characters
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
# Handle the case where tbw_used might have formatting issues
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance")
if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
# Extract numeric hours only
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to extract capacity in GB from various formats
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: [1.82 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: [500.1 GB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: 1.82TB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: 500.1GB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
echo "0"
else
echo "$capacity_gb"
fi
}
# Function to get NVMe capacity using smartctl
get_nvme_capacity() {
local disk=$1
local nvme_info=$(smartctl -i "$disk" 2>/dev/null)
local capacity=""
# Try to get capacity from different fields
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
fi
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
fi
echo "$capacity"
}
# Function to get human readable capacity
get_human_capacity() {
local capacity=$1
local capacity_gb=$2
if [[ $capacity_gb -ge 1000 ]]; then
echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB"
else
echo "${capacity_gb} GB"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Get disk information
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_capacity=$(get_nvme_capacity "$disk")
if [[ -n "$nvme_capacity" ]]; then
capacity="$nvme_capacity"
fi
fi
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb")
# If capacity extraction failed, try alternative method
if [[ "$capacity_gb" -eq 0 ]]; then
# Try to get capacity from model name or other methods
if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then
capacity_gb=1000
capacity_human="1 TB"
elif [[ "$model" =~ 2[Tt] ]]; then
capacity_gb=2000
capacity_human="2 TB"
elif [[ "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 250[Gg] ]]; then
capacity_gb=250
capacity_human="250 GB"
fi
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl extended attributes
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
# Extract data units written for NVMe
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
else
# Try alternative field
data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
fi
fi
# Get power on hours for NVMe
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
if [[ -n "$nvme_power_hours" ]]; then
power_on_hours="$nvme_power_hours"
fi
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9"
print_color $BLUE "===================================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
echo ""
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

530
harvester-v2.3.sh Executable file
View File

@@ -0,0 +1,530 @@
#!/bin/bash
# Disk Health Check Script for Harvester OS
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.3"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if smartctl is installed
command_exists() {
command -v "$1" >/dev/null 2>&1
}
if ! command_exists smartctl; then
print_color $RED "Error: smartctl is not installed. Please install smartmontools package."
exit 1
fi
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 4TB, use 4TB tier with scaling
echo "4000"
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
smart_cmd+=" -i $disk"
local info=$($smart_cmd 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc -l 2>/dev/null || echo "0")
if [[ $(echo "$tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$power_on_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$power_on_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$power_on_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
done
}
# Function to check a single disk
check_disk() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
# Check if we can read the disk
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract capacity in GB
local capacity_gb=0
if [[ $capacity =~ \[([0-9.]+)\s+GB\] ]]; then
capacity_gb=${BASH_REMATCH[1]%.*}
elif [[ $capacity =~ \[([0-9.]+)\s+TB\] ]]; then
capacity_gb=$(echo "${BASH_REMATCH[1]} * 1000" | bc -l 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc -l 2>/dev/null | cut -d. -f1)
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}')
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}')
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}')
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}')
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}')
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}')
# For NVMe disks using smartctl
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$($smart_cmd -x "$disk" 2>/dev/null)
power_on_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
total_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
[[ -z "$total_written" ]] && total_written=$(echo "$nvme_attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect RAID controllers and disks
detect_raid_disks() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto")
local disks=()
# Check for direct disks first (SATA/SAS/NVMe)
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different device patterns
for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do
if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then
disks+=("$base_disk:$controller,$i")
break
fi
done
done
done
# Check for JBOD/passthrough disks on MegaRAID
if command_exists storcli; then
local jbod_disks=$(storcli /c0/eALL/sALL show all 2>/dev/null | grep -i "jbod\|unconfigured" | awk '{print $2}')
for disk in $jbod_disks; do
disks+=("$disk:megaraid,$disk")
done
fi
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS"
print_color $BLUE "===================================================="
echo ""
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_raid_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $YELLOW "No disks found via auto-detection"
print_color $CYAN "Trying direct disk access..."
# Try direct access to common disks
for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check each disk
for disk_info in "${disks[@]}"; do
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
done
print_color $BLUE "Check completed!"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks and RAID arrays"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

679
harvester-v2.5.sh Executable file
View File

@@ -0,0 +1,679 @@
#!/bin/bash
# Disk Health Check Script for Harvester OS
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if smartctl is installed
command_exists() {
command -v "$1" >/dev/null 2>&1
}
if ! command_exists smartctl; then
print_color $RED "Error: smartctl is not installed. Please install smartmontools package."
exit 1
fi
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 8TB, use proportional scaling from 4TB
echo "8000"
}
# Function to convert bytes to human readable
bytes_to_human() {
local bytes=$1
if [[ $bytes -ge 1099511627776 ]]; then
echo "$(echo "scale=2; $bytes / 1099511627776" | bc -l 2>/dev/null || echo "0") TB"
elif [[ $bytes -ge 1073741824 ]]; then
echo "$(echo "scale=2; $bytes / 1073741824" | bc -l 2>/dev/null || echo "0") GB"
elif [[ $bytes -ge 1048576 ]]; then
echo "$(echo "scale=2; $bytes / 1048576" | bc -l 2>/dev/null || echo "0") MB"
else
echo "$bytes bytes"
fi
}
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
# Remove everything after non-numeric characters
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
smart_cmd+=" -i $disk"
local info=$($smart_cmd 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
# Handle the case where tbw_used might have formatting issues
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc -l 2>/dev/null || echo "$estimated_endurance")
if [[ $(echo "$clean_tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
# Extract numeric hours only
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to extract capacity in GB from various formats
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: [1.82 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc -l 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: [500.1 GB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc -l 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc -l 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: 1.82TB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc -l 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: 500.1GB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc -l 2>/dev/null | cut -d. -f1)
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
echo "0"
else
echo "$capacity_gb"
fi
}
# Function to get NVMe capacity using smartctl
get_nvme_capacity() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local nvme_info=$($smart_cmd -i "$disk" 2>/dev/null)
local capacity=""
# Try to get capacity from different fields
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
fi
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
fi
echo "$capacity"
}
# Function to get human readable capacity
get_human_capacity() {
local capacity=$1
local capacity_gb=$2
if [[ $capacity_gb -ge 1000 ]]; then
echo "$(echo "scale=2; $capacity_gb / 1000" | bc -l) TB"
else
echo "${capacity_gb} GB"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
# Check if we can read the disk
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_capacity=$(get_nvme_capacity "$disk" "$controller")
if [[ -n "$nvme_capacity" ]]; then
capacity="$nvme_capacity"
fi
fi
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb")
# If capacity extraction failed, try alternative method
if [[ "$capacity_gb" -eq 0 ]]; then
# Try to get capacity from model name or other methods
if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then
capacity_gb=1000
capacity_human="1 TB"
elif [[ "$model" =~ 2[Tt] ]]; then
capacity_gb=2000
capacity_human="2 TB"
elif [[ "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 250[Gg] ]]; then
capacity_gb=250
capacity_human="250 GB"
fi
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}')
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}')
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}')
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}')
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}')
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}')
# For NVMe disks using smartctl extended attributes
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$($smart_cmd -x "$disk" 2>/dev/null)
# Extract data units written for NVMe
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
total_written=$(echo "$data_units_written * 1000" | bc -l 2>/dev/null)
else
# Try alternative field
data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
total_written=$(echo "$data_units_written * 1000" | bc -l 2>/dev/null)
fi
fi
# Get power on hours for NVMe
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
if [[ -n "$nvme_power_hours" ]]; then
power_on_hours="$nvme_power_hours"
fi
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect RAID controllers and disks
detect_raid_disks() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto")
local disks=()
# Check for direct disks first (SATA/SAS/NVMe)
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different device patterns
for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do
if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then
disks+=("$base_disk:$controller,$i")
break
fi
done
done
done
# Check for JBOD/passthrough disks on MegaRAID
if command_exists storcli; then
local jbod_disks=$(storcli /c0/eALL/sALL show all 2>/dev/null | grep -i "jbod\|unconfigured" | awk '{print $2}')
for disk in $jbod_disks; do
disks+=("$disk:megaraid,$disk")
done
fi
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS"
print_color $BLUE "===================================================="
echo ""
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_raid_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $YELLOW "No disks found via auto-detection"
print_color $CYAN "Trying direct disk access..."
# Try direct access to common disks
for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check each disk
for disk_info in "${disks[@]}"; do
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
done
print_color $BLUE "Check completed!"
echo ""
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks and RAID arrays"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

615
old/ubuntu-v2.5.sh Executable file
View File

@@ -0,0 +1,615 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data - FIXED VERSION
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if SMART is available - FIXED PARSING
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status - FIXED LOGIC
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model")
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Used: ${tbw_used} TB"
fi
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Remaining: $tbw_remaining"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions) - FIXED
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific) - FIXED
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function - FIXED
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks - FIXED: don't mix output with disk detection
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

531
ubuntu-v2.3.sh Executable file
View File

@@ -0,0 +1,531 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.3"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 4TB, use 4TB tier with scaling
echo "4000"
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$power_on_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$power_on_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$power_on_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Get disk information
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract capacity in GB
local capacity_gb=0
if echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
power_on_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
total_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
[[ -z "$total_written" ]] && total_written=$(echo "$nvme_attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu"
print_color $BLUE "=============================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

638
ubuntu-v2.4.sh Executable file
View File

@@ -0,0 +1,638 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.4"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
done
# For larger than 8TB, use proportional scaling from 4TB
echo "8000"
}
# Function to convert bytes to human readable
bytes_to_human() {
local bytes=$1
if [[ $bytes -ge 1099511627776 ]]; then
echo "$(echo "scale=2; $bytes / 1099511627776" | bc) TB"
elif [[ $bytes -ge 1073741824 ]]; then
echo "$(echo "scale=2; $bytes / 1073741824" | bc) GB"
elif [[ $bytes -ge 1048576 ]]; then
echo "$(echo "scale=2; $bytes / 1048576" | bc) MB"
else
echo "$bytes bytes"
fi
}
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
# Remove everything after non-numeric characters
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to get disk type and interface
get_disk_info() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown|New"
return
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
# Handle the case where tbw_used might have formatting issues
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance")
if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
# Extract numeric hours only
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to extract capacity in GB from various formats
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[TtGg][Bb] ]]; then
# Pattern: [500.1 GB] or [1.0 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
if echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[TtGg][Bb] ]]; then
# Pattern: 500.1GB or 1.0TB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
if echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || "$capacity_gb" -le 0 ]]; then
echo "0"
else
echo "$capacity_gb"
fi
}
# Function to get NVMe capacity using smartctl
get_nvme_capacity() {
local disk=$1
local nvme_info=$(smartctl -i "$disk" 2>/dev/null)
local capacity=""
# Try to get capacity from different fields
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
fi
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
fi
echo "$capacity"
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Get disk information
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_capacity=$(get_nvme_capacity "$disk")
if [[ -n "$nvme_capacity" ]]; then
capacity="$nvme_capacity"
fi
fi
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=$(bytes_to_human "$(echo "$capacity_gb * 1000 * 1000 * 1000" | bc 2>/dev/null)")
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl extended attributes
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
# Extract data units written for NVMe
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
fi
# Get power on hours for NVMe
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
if [[ -n "$nvme_power_hours" ]]; then
power_on_hours="$nvme_power_hours"
fi
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
fi
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu"
print_color $BLUE "=============================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
echo ""
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

View File

@@ -1,8 +1,8 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
# Disk Health Check Script for Ubuntu
# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid
# Supports consumer and enterprise disk classification
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
@@ -13,6 +13,7 @@ GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
MAGENTA=$(tput setaf 5)
NC=$(tput sgr0)
# Function to print colored output
@@ -46,190 +47,153 @@ check_dependencies() {
fi
}
# Function to test SMART access and get available data - FIXED VERSION
test_smart_access() {
local disk=$1
local controller=$2
# TBW endurance standards (using lowest numbers)
declare -A CONSUMER_TBW=(
["250"]=150
["500"]=300
["1000"]=600
["2000"]=1200
["4000"]=2400
["8000"]=4800
)
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
declare -A ENTERPRISE_TBW=(
["250"]=450
["500"]=900
["1000"]=1800
["2000"]=3600
["4000"]=7200
["8000"]=14400
)
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
# Function to get closest capacity tier
get_capacity_tier() {
local capacity_gb=$1
local tiers=("250" "500" "1000" "2000" "4000" "8000")
for tier in "${tiers[@]}"; do
if [[ $capacity_gb -le $tier ]]; then
echo $tier
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if SMART is available - FIXED PARSING
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status - FIXED LOGIC
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
done
# For larger than 8TB, use proportional scaling from 4TB
echo "8000"
}
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
# Function to convert bytes to human readable
bytes_to_human() {
local bytes=$1
if [[ $bytes -ge 1099511627776 ]]; then
echo "$(echo "scale=2; $bytes / 1099511627776" | bc 2>/dev/null || echo "0") TB"
elif [[ $bytes -ge 1073741824 ]]; then
echo "$(echo "scale=2; $bytes / 1073741824" | bc 2>/dev/null || echo "0") GB"
elif [[ $bytes -ge 1048576 ]]; then
echo "$(echo "scale=2; $bytes / 1048576" | bc 2>/dev/null || echo "0") MB"
else
disk_type="SSD"
echo "$bytes bytes"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
# Function to extract numeric hours from power_on_hours field
extract_numeric_hours() {
local power_on_hours=$1
# Remove everything after non-numeric characters
local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//')
if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then
echo "$numeric_hours"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Function to get disk type and interface
get_disk_info() {
local disk=$1
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
local info=$(smartctl -i "$disk" 2>/dev/null)
local transport=""
local disk_type="UNKNOWN"
local is_enterprise=false
# Check if it's NVMe
if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then
disk_type="NVMe"
transport="NVMe"
# Check for SAS
elif echo "$info" | grep -qi "SAS"; then
disk_type="SAS"
transport="SAS"
is_enterprise=true
# Check for SATA SSD
elif echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
transport="SATA"
# Check for SATA HDD
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
transport="SATA"
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
# Check for enterprise features
if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then
is_enterprise=true
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
# Check device type by model name
local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE\|MX500\|870QVO\|860QVO\|Crucial\|Samsung\|Intel"; then
if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then
is_enterprise=true
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
echo "$disk_type|$transport|$is_enterprise"
}
# Function to calculate TBW for SSD/NVMe
calculate_tbw() {
local disk_type=$1
local raw_value=$2
local sectors=$3
local tbw=0
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common for SATA SSDs)
local bytes=$((sectors * 512))
tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
if [[ "$disk_type" == "NVMe" ]]; then
# NVMe: raw value is in 32MB units
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
else
# SATA SSD: various manufacturers
tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
fi
fi
echo "$tbw"
}
# Function to get estimated endurance
get_estimated_endurance() {
local capacity_gb=$1
local is_enterprise=$2
local disk_type=$3
# HDDs don't have TBW
if [[ "$disk_type" == "HDD" ]]; then
echo "N/A"
return
fi
local capacity_tier=$(get_capacity_tier "$capacity_gb")
if [[ "$is_enterprise" == "true" ]]; then
echo "${ENTERPRISE_TBW[$capacity_tier]}"
else
echo "${CONSUMER_TBW[$capacity_tier]}"
fi
}
@@ -237,60 +201,40 @@ estimate_ssd_endurance() {
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
local estimated_endurance=$3
local disk_type=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
echo "Unknown||Unknown|New"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
if [[ "$estimated_endurance" == "N/A" ]]; then
echo "N/A|N/A|N/A|HDD"
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
# Handle the case where tbw_used might have formatting issues
local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g')
if [[ -z "$clean_tbw_used" ]]; then
clean_tbw_used=0
fi
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $clean_tbw_used" | bc 2>/dev/null || echo "$estimated_endurance")
if [[ $(echo "$clean_tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $clean_tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear|$disk_type"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear|$disk_type"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy|$disk_type"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
echo "Unknown|${estimated_endurance} TB|New|$disk_type"
fi
}
@@ -299,185 +243,344 @@ estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
local start_stop_count=$4
local load_cycle_count=$5
local disk_type=$6
if [[ -z "$power_on_hours" ]]; then
# Extract numeric hours only
local numeric_hours=$(extract_numeric_hours "$power_on_hours")
if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
local severity=0
# Critical issues
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
return
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
severity=$((severity + 3))
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
severity=$((severity + 2))
elif [[ "$reallocated_sectors" -gt 0 ]]; then
severity=$((severity + 1))
fi
# Usage-based assessment
if [[ "$numeric_hours" -gt 50000 ]]; then
severity=$((severity + 3))
elif [[ "$numeric_hours" -gt 30000 ]]; then
severity=$((severity + 2))
elif [[ "$numeric_hours" -gt 15000 ]]; then
severity=$((severity + 1))
fi
# Mechanical wear (for HDDs)
if [[ "$disk_type" == "HDD" ]]; then
if [[ "$start_stop_count" -gt 50000 ]]; then
severity=$((severity + 2))
elif [[ "$start_stop_count" -gt 20000 ]]; then
severity=$((severity + 1))
fi
if [[ "$load_cycle_count" -gt 500000 ]]; then
severity=$((severity + 2))
elif [[ "$load_cycle_count" -gt 200000 ]]; then
severity=$((severity + 1))
fi
fi
if [[ $severity -ge 5 ]]; then
echo "${RED}< 6 months${NC} (Multiple risk factors)"
elif [[ $severity -ge 3 ]]; then
echo "${YELLOW}6-18 months${NC} (Moderate wear)"
elif [[ $severity -ge 1 ]]; then
echo "${YELLOW}1-3 years${NC} (Light wear)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
echo "${GREEN}> 3 years${NC} (Healthy)"
fi
}
# Function to check a single disk with enhanced error handling
# Function to check soft-raid (MDRAID)
check_mdraid() {
local md_devices=()
if [[ -f /proc/mdstat ]]; then
while IFS= read -r line; do
if [[ $line =~ ^md[0-9]+ ]]; then
md_devices+=("/dev/${line%% *}")
fi
done < /proc/mdstat
fi
for md in "${md_devices[@]}"; do
if [[ -b "$md" ]]; then
print_color $MAGENTA "Found software RAID: $md"
if command_exists mdadm; then
local md_info=$(mdadm --detail "$md" 2>/dev/null)
if [[ -n "$md_info" ]]; then
echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')"
echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')"
echo ""
fi
fi
fi
done
}
# Function to extract capacity in GB from various formats
extract_capacity_gb() {
local capacity=$1
local capacity_gb=0
# Try different patterns to extract capacity
if [[ $capacity =~ \[([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: [1.82 TB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ \[([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: [500.1 GB]
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,]+)\s+bytes ]]; then
# Pattern: 500,107,862,016 bytes
local bytes=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $bytes / 1000 / 1000 / 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Tt][Bb] ]]; then
# Pattern: 1.82TB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size * 1000" | bc 2>/dev/null | cut -d. -f1)
elif [[ $capacity =~ ([0-9,.]+)\s*[Gg][Bb] ]]; then
# Pattern: 500.1GB
local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',')
capacity_gb=$(echo "scale=0; $size" | bc 2>/dev/null | cut -d. -f1)
fi
# Ensure we have a valid number
if [[ -z "$capacity_gb" || ! "$capacity_gb" =~ ^[0-9]+$ || "$capacity_gb" -le 0 ]]; then
echo "0"
else
echo "$capacity_gb"
fi
}
# Function to get NVMe capacity using smartctl
get_nvme_capacity() {
local disk=$1
local nvme_info=$(smartctl -i "$disk" 2>/dev/null)
local capacity=""
# Try to get capacity from different fields
capacity=$(echo "$nvme_info" | grep -i "Total NVM Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "Namespace 1 Size/Capacity" | cut -d: -f2 | sed 's/^[ \t]*//')
fi
if [[ -z "$capacity" ]]; then
capacity=$(echo "$nvme_info" | grep -i "User Capacity" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
fi
echo "$capacity"
}
# Function to get human readable capacity
get_human_capacity() {
local capacity=$1
local capacity_gb=$2
if [[ $capacity_gb -ge 1000 ]]; then
echo "$(echo "scale=2; $capacity_gb / 1000" | bc) TB"
else
echo "${capacity_gb} GB"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
fi
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
local disk_info=$(get_disk_info "$disk")
local disk_type=$(echo "$disk_info" | cut -d'|' -f1)
local transport=$(echo "$disk_info" | cut -d'|' -f2)
local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3)
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then
print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need root access."
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$model" ]] && model=$(echo "$info" | grep "Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# For NVMe disks, try to get capacity from different fields
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_capacity=$(get_nvme_capacity "$disk")
if [[ -n "$nvme_capacity" ]]; then
capacity="$nvme_capacity"
fi
fi
# Extract capacity in GB and human readable format
local capacity_gb=$(extract_capacity_gb "$capacity")
local capacity_human=$(get_human_capacity "$capacity" "$capacity_gb")
# If capacity extraction failed, try alternative method
if [[ "$capacity_gb" -eq 0 ]]; then
# Try to get capacity from model name or other methods
if [[ "$disk_type" == "NVMe" && "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 960[Gg] ]] || [[ "$model" =~ 1[Tt] ]]; then
capacity_gb=1000
capacity_human="1 TB"
elif [[ "$model" =~ 2[Tt] ]]; then
capacity_gb=2000
capacity_human="2 TB"
elif [[ "$model" =~ 500[Gg] ]]; then
capacity_gb=500
capacity_human="500 GB"
elif [[ "$model" =~ 250[Gg] ]]; then
capacity_gb=250
capacity_human="250 GB"
fi
fi
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours_raw=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local power_on_hours=$(extract_numeric_hours "$power_on_hours_raw")
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
local start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}' | head -1)
local load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}' | head -1)
# For NVMe disks using smartctl extended attributes
if [[ "$disk_type" == "NVMe" ]]; then
local nvme_attributes=$(smartctl -x "$disk" 2>/dev/null)
# Extract data units written for NVMe
local data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $4}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
# Convert data units to sectors (1 data unit = 1000 sectors for NVMe)
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
else
# Try alternative field
data_units_written=$(echo "$nvme_attributes" | grep "Data Units Written" | awk '{print $5}' | tr -d ',')
if [[ -n "$data_units_written" ]]; then
total_written=$(echo "$data_units_written * 1000" | bc 2>/dev/null)
fi
fi
# Get power on hours for NVMe
local nvme_power_hours=$(echo "$nvme_attributes" | grep "Power On Hours" | awk '{print $4}')
if [[ -n "$nvme_power_hours" ]]; then
power_on_hours="$nvme_power_hours"
fi
fi
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Interface: $transport"
echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")"
echo "Capacity: $capacity_human"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
if [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
[[ -n "$start_stop_count" ]] && echo "Start/Stop Count: ${start_stop_count:-0}"
[[ -n "$load_cycle_count" ]] && echo "Load Cycle Count: ${load_cycle_count:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours_raw" "${reallocated_sectors:-0}" "${pending_sectors:-0}" "${start_stop_count:-0}" "${load_cycle_count:-0}" "$disk_type")
echo "Lifespan: $lifespan"
elif [[ "$disk_type" == "SSD" || "$disk_type" == "NVMe" || "$disk_type" == "SAS" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model")
tbw_used=$(calculate_tbw "$disk_type" "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model")
tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "")
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type")
echo "TBW Used: ${tbw_used} TB"
fi
echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)"
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
if [[ "$estimated_endurance" != "N/A" ]]; then
echo "TBW Remaining: $tbw_remaining"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
# Show mechanical attributes for SAS drives that might be SSDs
if [[ "$disk_type" == "SAS" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions) - FIXED
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
# Check for NVMe disks (base devices only, no partitions)
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
@@ -487,40 +590,19 @@ detect_disks() {
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific) - FIXED
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function - FIXED
# Main function
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
print_color $BLUE "Disk Health Check Script v$VERSION for Ubuntu"
print_color $BLUE "=============================================="
echo ""
check_dependencies
local disks=()
# Check for soft-raid first
check_mdraid
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
@@ -531,17 +613,9 @@ main() {
fi
done
else
# Auto-detect disks - FIXED: don't mix output with disk detection
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
@@ -555,33 +629,20 @@ main() {
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
print_color $YELLOW "Note: TBW Endurance values shown are minimum guaranteed numbers."
print_color $YELLOW " Actual endurance for your specific drive model may be higher."
}
# Usage information
@@ -594,9 +655,10 @@ usage() {
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
echo ""
echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID"
}
# Parse command line arguments