first commit

This commit is contained in:
2025-10-22 03:04:37 +08:00
parent 18e87d5477
commit 38c4e1e170
6 changed files with 3078 additions and 0 deletions

398
alma-v2.1.sh Executable file
View File

@@ -0,0 +1,398 @@
#!/bin/bash
# Disk Health Check Script for Alma Linux 9
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.1"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo dnf install ${missing[*]}"
exit 1
fi
}
# Function to get disk type
get_disk_type() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
if echo "$info" | grep -q "Solid State Device"; then
echo "SSD"
elif echo "$info" | grep -q "Rotation Rate"; then
echo "HDD"
else
echo "UNKNOWN"
fi
}
# Function to calculate TBW for SSD
calculate_tbw() {
local raw_value=$1
local sectors=$2
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Enterprise SSDs typically have higher endurance
if echo "$disk_model" | grep -qi "MTFDDAK480TDS\|MICRON\|INTEL\|SAMSUNG\|KIOXIA"; then
# Enterprise SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy"
fi
else
echo "Unknown|${estimated_endurance} TB|New"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
# Convert to integers
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check disk SMART capabilities
check_smart_capability() {
local disk=$1
local info=$(smartctl -i "$disk" 2>/dev/null)
if echo "$info" | grep -q "SMART support is: Available"; then
if echo "$info" | grep -q "SMART support is: Enabled"; then
echo "enabled"
else
echo "available"
fi
else
echo "unavailable"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
print_color $CYAN "Checking disk: $disk"
echo "=================================================="
# Check if disk exists and is accessible
if [[ ! -b "$disk" ]]; then
print_color $RED "Error: $disk is not a valid block device"
echo ""
return
fi
# Check SMART capability
local smart_status=$(check_smart_capability "$disk")
if [[ "$smart_status" == "unavailable" ]]; then
print_color $YELLOW "SMART not supported on $disk"
echo ""
return
elif [[ "$smart_status" == "available" ]]; then
print_color $YELLOW "SMART available but not enabled on $disk"
echo "Enable with: smartctl -s on $disk"
echo ""
return
fi
# Get basic disk information
local info=$(smartctl -i "$disk" 2>/dev/null)
local health=$(smartctl -H "$disk" 2>/dev/null)
local attributes=$(smartctl -A "$disk" 2>/dev/null)
# Check if smartctl command succeeded
if [[ $? -ne 0 ]]; then
print_color $RED "Error: Cannot read SMART data from $disk"
echo "You may need to run this script as root"
echo ""
return
fi
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract capacity in GB for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
else
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
fi
local disk_type=$(get_disk_type "$disk")
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
# Extract SMART attributes
local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}' | head -1)
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
echo "Power On Hours: ${power_on_hours:-Unknown}"
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "")
fi
echo "TBW Used: ${tbw_used} TB"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
elif [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
fi
echo ""
}
# Function to detect all disks
detect_disks() {
local disks=()
# Check for SATA/SAS disks
for disk in /dev/sd[a-z] /dev/sd[a-z][a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks
for disk in /dev/nvme[0-9]n[0-9] /dev/nvme[0-9]n[0-9]p[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Alma Linux 9"
print_color $BLUE "===================================================="
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root. Some disks may not be accessible."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

536
alma-v2.4.sh Executable file
View File

@@ -0,0 +1,536 @@
#!/bin/bash
# Disk Health Check Script for Alma Linux 9
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.4"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo dnf install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Check if SMART is enabled (don't enable it, just check status)
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
local smart_available=$(echo "$smart_info" | grep "SMART support is:" | awk '{print $4}')
local smart_enabled=$(echo "$smart_info" | grep "SMART support is:" | awk '{print $6}')
if [[ "$smart_available" != "Available" ]]; then
echo "not_available"
return
fi
if [[ "$smart_enabled" != "Enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]]; then
echo "no_attributes"
return
fi
# Check if we have basic attributes
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours\|Power-On" | awk '{print $10}' | head -1)
if [[ -z "$power_on_hours" ]]; then
echo "limited_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts for SAS
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours\|Power-On" | awk '{print $NF}' | head -1)
[[ -z "$power_on_hours" ]] && power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours_and_Msec" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct\|Reallocated_Event_Count" | awk '{print $NF}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $NF}' | head -1)
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Host_Writes_32MiB\|Lifetime_Writes" | awk '{print $NF}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $NF}' | head -1)
# For SAS drives, try to get media wearout for SSDs
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count" | awk '{print $NF}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD
calculate_tbw() {
local raw_value=$1
local sectors=$2
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# SAS SSDs typically have very high endurance
if echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA"; then
# Enterprise SATA SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}10%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "")
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Used: ${tbw_used} TB"
fi
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Remaining: $tbw_remaining"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions)
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Alma Linux 9 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
read -ra disks <<< "$(detect_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk in "${disks[@]}"; do
check_disk "$disk"
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

337
harvester-v2.1.sh Executable file
View File

@@ -0,0 +1,337 @@
#!/bin/bash
# Disk Health Check Script for Harvester OS
# Checks SSD TBW/lifespan and HDD health status
# Supports RAID controllers and direct disks
SCRIPT_NAME=$(basename "$0")
VERSION="2.1"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if smartctl is installed
command_exists() {
command -v "$1" >/dev/null 2>&1
}
if ! command_exists smartctl; then
print_color $RED "Error: smartctl is not installed. Please install smartmontools package."
exit 1
fi
# Function to get disk type
get_disk_type() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
smart_cmd+=" -i $disk"
local info=$($smart_cmd 2>/dev/null)
if echo "$info" | grep -q "Solid State Device"; then
echo "SSD"
elif echo "$info" | grep -q "Rotation Rate"; then
echo "HDD"
else
echo "UNKNOWN"
fi
}
# Function to calculate TBW for SSD
calculate_tbw() {
local raw_value=$1
local sectors=$2
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
# Calculate from sectors (most common)
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Try to calculate from raw value (varies by manufacturer)
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Enterprise SSDs typically have higher endurance
if echo "$disk_model" | grep -qi "MTFDDAK480TDS\|MICRON\|INTEL\|SAMSUNG"; then
# Enterprise SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc -l 2>/dev/null || echo "0")
if [[ $(echo "$tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear"
elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy"
fi
else
echo "Unknown|${estimated_endurance} TB|New"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
# HDD lifespan estimation based on common failure patterns
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check a single disk
check_disk() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Get basic disk information
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
# Extract disk information
local model=$(echo "$info" | grep "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//')
local serial=$(echo "$info" | grep "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//')
local capacity=$(echo "$info" | grep "User Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1)
local firmware=$(echo "$info" | grep "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract capacity in GB for endurance calculation
local capacity_gb=0
if [[ $capacity =~ \[([0-9.]+)\s+GB\] ]]; then
capacity_gb=${BASH_REMATCH[1]}
elif [[ $capacity =~ \[([0-9.]+)\s+TB\] ]]; then
capacity_gb=$(echo "${BASH_REMATCH[1]} * 1000" | bc -l 2>/dev/null | cut -d. -f1)
fi
local disk_type=$(get_disk_type "$disk" "$controller")
local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//')
# Extract SMART attributes
local power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}')
local reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}')
local pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}')
local total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}')
# For SSDs with Host_Writes_32MiB
local host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}')
# Display basic information
echo "Model: $model"
echo "Serial: $serial"
echo "Type: $disk_type"
echo "Capacity: $capacity"
echo "Firmware: $firmware"
echo "Health: $health_status"
echo "Power On Hours: $power_on_hours"
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "")
fi
echo "TBW Used: ${tbw_used} TB"
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
elif [[ "$disk_type" == "HDD" ]]; then
echo "Realloc Sectors: ${reallocated_sectors:-0}"
echo "Pending Sectors: ${pending_sectors:-0}"
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Unknown disk type - limited information available"
fi
echo ""
}
# Function to detect RAID controllers and disks
detect_raid_disks() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local disks=()
# Check for direct disks first
for disk in /dev/sd[a-z] /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..15}; do
if smartctl -d "$controller,$i" -i /dev/sda >/dev/null 2>&1; then
disks+=("/dev/sda:$controller,$i")
fi
done
done
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS"
print_color $BLUE "===================================================="
echo ""
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks..."
read -ra disks <<< "$(detect_raid_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check each disk
for disk_info in "${disks[@]}"; do
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
done
print_color $BLUE "Check completed!"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks and RAID arrays"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/sda /dev/sdb # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

542
harvester-v2.4.sh Executable file
View File

@@ -0,0 +1,542 @@
#!/bin/bash
# Disk Health Check Script for Harvester OS
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.4"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
if ! command_exists smartctl; then
print_color $RED "Error: smartctl is not installed. Please install smartmontools package."
exit 1
fi
# Function to test SMART access and get available data
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Check if SMART is enabled (don't enable it, just check status)
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
local smart_available=$(echo "$smart_info" | grep "SMART support is:" | awk '{print $4}')
local smart_enabled=$(echo "$smart_info" | grep "SMART support is:" | awk '{print $6}')
if [[ "$smart_available" != "Available" ]]; then
echo "not_available"
return
fi
if [[ "$smart_enabled" != "Enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]]; then
echo "no_attributes"
return
fi
# Check if we have basic attributes
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours\|Power-On" | awk '{print $10}' | head -1)
if [[ -z "$power_on_hours" ]]; then
echo "limited_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts for SAS
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours\|Power-On" | awk '{print $NF}' | head -1)
[[ -z "$power_on_hours" ]] && power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours_and_Msec" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct\|Reallocated_Event_Count" | awk '{print $NF}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $NF}' | head -1)
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Host_Writes_32MiB\|Lifetime_Writes" | awk '{print $NF}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $NF}' | head -1)
# For SAS drives, try to get media wearout for SSDs
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count" | awk '{print $NF}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD
calculate_tbw() {
local raw_value=$1
local sectors=$2
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc -l 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# SAS SSDs typically have very high endurance
if echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA"; then
# Enterprise SATA SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc -l 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}10%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc -l 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc -l 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc -l) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc -l) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "")
fi
if [[ $(echo "$tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then
echo "TBW Used: ${tbw_used} TB"
fi
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc -l 2>/dev/null) -eq 1 ]]; then
echo "TBW Remaining: $tbw_remaining"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect RAID controllers and disks with PERC H730P support
detect_raid_disks() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "auto")
local disks=()
# Check for direct disks first - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for SAS disks directly via SCSI generic
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
fi
done
# Check for RAID controllers with enhanced detection
for controller in "${controllers[@]}"; do
print_color $BLUE "Scanning for $controller controllers..."
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
disks+=("$base_disk:$controller,$i")
print_color $GREEN " Found $controller,$i on $base_disk"
break
fi
fi
done
done
done
# Special detection for PERC H730P
print_color $BLUE "Scanning for PERC H730P controllers..."
if command_exists storcli; then
print_color $GREEN " storcli detected - checking PERC H730P"
disks+=("/dev/sda:perc-h730p")
fi
echo "${disks[@]}"
}
# Main function
main() {
print_color $BLUE "Harvester OS Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
if ! command_exists smartctl; then
print_color $RED "Error: smartctl is not installed. Please install smartmontools package."
exit 1
fi
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk:direct")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks and RAID controllers..."
read -ra disks <<< "$(detect_raid_disks)"
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root: sudo $0"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Use 'storcli /c0 show all' for detailed information"
print_color $CYAN " - Use 'storcli /c0/eall/sall show' for physical disk status"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks and RAID arrays"
echo ""
echo "Examples:"
echo " sudo $SCRIPT_NAME # Check all disks (recommended)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/sdb # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

615
ubuntu-v2.5.sh Executable file
View File

@@ -0,0 +1,615 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.5"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data - FIXED VERSION
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if SMART is available - FIXED PARSING
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status - FIXED LOGIC
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks for SAS drives
local model=$(echo "$info" | grep -i "Device Model:\|Product:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type with SAS support
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
# SAS drives often don't specify, check rotation rate
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1)
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
# Enterprise SAS SSDs - very high endurance
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
if [[ -z "$power_on_hours" ]]; then
echo "Unknown"
return
fi
power_on_hours=${power_on_hours:-0}
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$power_on_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $power_on_hours hours)"
elif [[ "$power_on_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $power_on_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $power_on_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model")
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Used: ${tbw_used} TB"
fi
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
if [[ "$wear_source" != "media_wearout" && $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
echo "TBW Remaining: $tbw_remaining"
fi
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions) - FIXED
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific) - FIXED
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function - FIXED
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks - FIXED: don't mix output with disk detection
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac

650
ubuntu-v2.6.sh Executable file
View File

@@ -0,0 +1,650 @@
#!/bin/bash
# Disk Health Check Script for Ubuntu 24.04
# Enhanced with SAS/PERC H730P controller support
# Checks SSD TBW/lifespan and HDD health status
SCRIPT_NAME=$(basename "$0")
VERSION="2.6"
# Color codes
RED=$(tput setaf 1)
GREEN=$(tput setaf 2)
YELLOW=$(tput setaf 3)
BLUE=$(tput setaf 4)
CYAN=$(tput setaf 6)
NC=$(tput sgr0)
# Function to print colored output
print_color() {
local color=$1
local message=$2
echo -e "${color}${message}${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check dependencies
check_dependencies() {
local missing=()
if ! command_exists smartctl; then
missing+=("smartmontools")
fi
if ! command_exists bc; then
missing+=("bc")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_color $RED "Error: Missing required packages: ${missing[*]}"
echo "Install with: sudo apt update && sudo apt install ${missing[*]}"
exit 1
fi
}
# Function to test SMART access and get available data - ENHANCED FOR NVMe
test_smart_access() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
# Test basic SMART access
if ! $smart_cmd -i "$disk" &>/dev/null; then
echo "no_access"
return
fi
# Get SMART information
local smart_info=$($smart_cmd -i "$disk" 2>/dev/null)
# Check if this is an NVMe drive
if echo "$smart_info" | grep -qi "NVMe"; then
# NVMe drives have different SMART implementation
if $smart_cmd -H "$disk" &>/dev/null; then
echo "full_access"
else
echo "no_attributes"
fi
return
fi
# Check if SMART is available for SATA/SAS
if ! echo "$smart_info" | grep -q "SMART support is:"; then
echo "not_available"
return
fi
# Extract SMART status
local smart_support_line=$(echo "$smart_info" | grep "SMART support is:")
local smart_available=$(echo "$smart_support_line" | grep -q "Available" && echo "Available" || echo "")
local smart_enabled=$(echo "$smart_support_line" | grep -q "Enabled" && echo "Enabled" || echo "")
if [[ -z "$smart_available" ]]; then
echo "not_available"
return
fi
if [[ -z "$smart_enabled" ]]; then
echo "disabled"
return
fi
# Test attribute reading
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
if [[ -z "$attributes" ]] || ! echo "$attributes" | grep -q "ATTRIBUTE_NAME"; then
echo "no_attributes"
return
fi
echo "full_access"
}
# Function to get disk information with enhanced SAS and NVMe support
get_disk_info() {
local disk=$1
local controller=$2
local smart_cmd="smartctl"
[[ -n "$controller" ]] && smart_cmd+=" -d $controller"
local info=$($smart_cmd -i "$disk" 2>/dev/null)
local attributes=$($smart_cmd -A "$disk" 2>/dev/null)
local health=$($smart_cmd -H "$disk" 2>/dev/null)
# Extract information with multiple fallbacks
local model=$(echo "$info" | grep -i "Device Model:\|Product:\|Model Number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local vendor=$(echo "$info" | grep -i "Vendor:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -n "$vendor" && -n "$model" ]] && model="$vendor $model"
local serial=$(echo "$info" | grep -i "Serial Number:\|Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local capacity=$(echo "$info" | grep -i "User Capacity:\|Total NVM Capacity:\|Namespace 1 Size/Capacity:" | cut -d: -f2 | sed 's/^[ \t]*//' | cut -d'[' -f1 | head -1)
local firmware=$(echo "$info" | grep -i "Firmware Version:\|Firmware revision:\|Revision:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
local health_status=$(echo "$health" | grep -i "result:\|SMART overall-health\|Health Status:" | cut -d: -f2 | sed 's/^[ \t]*//' | head -1)
[[ -z "$health_status" ]] && health_status=$(echo "$health" | grep -i "SMART overall-health" | awk -F'[' '{print $2}' | cut -d']' -f1)
# Get disk type
local disk_type="UNKNOWN"
if echo "$info" | grep -qi "Solid State Device\|NVMe"; then
disk_type="SSD"
elif echo "$info" | grep -qi "Rotation Rate"; then
disk_type="HDD"
elif echo "$info" | grep -qi "SCSI\|SAS"; then
if echo "$info" | grep -qi "15000\|10000\|7200"; then
disk_type="HDD"
else
disk_type="SSD"
fi
fi
# Extract SMART attributes with multiple field attempts
local power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $10}' | head -1 | sed 's/[^0-9]//g')
local reallocated_sectors=$(echo "$attributes" | grep -i "Reallocated_Sector_Ct" | awk '{print $10}' | head -1)
local pending_sectors=$(echo "$attributes" | grep -i "Current_Pending_Sector" | awk '{print $10}' | head -1)
# For Kingston and other SSDs with different attribute names
local total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written\|Lifetime_Writes_GiB\|Host_Writes_32MiB\|Flash_Writes_GiB\|Data Units Written" | awk '{print $10}' | head -1)
local host_writes_32mib=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $10}' | head -1)
# For wear leveling indicators
local media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator\|Wear_Leveling_Count\|SSD_Life_Left\|Percentage Used\|Available Spare" | awk '{print $10}' | head -1)
echo "$model|$serial|$capacity|$firmware|$health_status|$disk_type|$power_on_hours|$reallocated_sectors|$pending_sectors|$total_written|$host_writes_32mib|$media_wearout"
}
# Function to calculate TBW for SSD - ENHANCED FOR KINGSTON AND NVMe
calculate_tbw() {
local raw_value=$1
local sectors=$2
local disk_model=$3
local attribute_name=$4
# Kingston SSDs use Lifetime_Writes_GiB and Flash_Writes_GiB
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from GiB to TB
local tbw=$(echo "scale=2; $raw_value / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
# NVMe drives use Data Units Written (1 unit = 1,000,000 bytes for NVMe 1.0+, 512,000 bytes for older)
if echo "$attribute_name" | grep -qi "Data Units Written"; then
if [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
# Convert from data units to TB (assuming 1,000,000 bytes per unit)
local bytes=$(echo "$raw_value * 1000000" | bc 2>/dev/null)
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
return
fi
fi
if [[ -n "$sectors" && "$sectors" != "0" ]]; then
local bytes=$((sectors * 512))
local tbw=$(echo "scale=2; $bytes / 1000 / 1000 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then
local tbw=$(echo "scale=2; $raw_value * 32 / 1000 / 1000" | bc 2>/dev/null || echo "0")
echo "$tbw"
else
echo "0"
fi
}
# Function to estimate SSD endurance based on model and capacity
estimate_ssd_endurance() {
local disk_model=$1
local capacity_gb=$2
# Kingston consumer SSDs
if echo "$disk_model" | grep -qi "KINGSTON.*SA400"; then
if [[ $capacity_gb -ge 960 ]]; then
echo "300" # 300TB for 960GB Kingston SA400
elif [[ $capacity_gb -ge 480 ]]; then
echo "150" # 150TB for 480GB Kingston
else
echo "80" # 80TB for smaller Kingston
fi
# NVMe SSDs typically have higher endurance
elif echo "$disk_model" | grep -qi "NVMe"; then
if [[ $capacity_gb -ge 2000 ]]; then
echo "1200" # 1.2PB for 2TB+ NVMe
elif [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB NVMe
elif [[ $capacity_gb -ge 500 ]]; then
echo "300" # 300TB for 500GB NVMe
else
echo "150" # 150TB for smaller NVMe
fi
# SAS SSDs typically have very high endurance
elif echo "$disk_model" | grep -qi "ST600MP\|SEAGATE.*SSD\|SAS.*SSD"; then
if [[ $capacity_gb -ge 1000 ]]; then
echo "10000" # 10PB for 1TB+ enterprise SAS SSD
elif [[ $capacity_gb -ge 600 ]]; then
echo "6000" # 6PB for 600GB enterprise SAS SSD
elif [[ $capacity_gb -ge 400 ]]; then
echo "4000" # 4PB for 400GB enterprise SAS SSD
else
echo "2000" # 2PB for smaller enterprise SAS SSD
fi
elif echo "$disk_model" | grep -qi "MTFDDAK\|MICRON\|INTEL\|SAMSUNG\|KIOXIA\|WDC\|WESTERN DIGITAL"; then
# Enterprise SATA/NVMe SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "1200" # 1.2PB for 1TB enterprise
elif [[ $capacity_gb -ge 480 ]]; then
echo "600" # 600TB for 480GB enterprise
elif [[ $capacity_gb -ge 240 ]]; then
echo "300" # 300TB for 240GB enterprise
else
echo "150" # 150TB for smaller enterprise
fi
else
# Consumer SSDs
if [[ $capacity_gb -ge 1000 ]]; then
echo "600" # 600TB for 1TB consumer
elif [[ $capacity_gb -ge 480 ]]; then
echo "300" # 300TB for 480GB consumer
elif [[ $capacity_gb -ge 240 ]]; then
echo "150" # 150TB for 240GB consumer
elif [[ $capacity_gb -ge 120 ]]; then
echo "80" # 80TB for 120GB consumer
else
echo "40" # 40TB for smaller drives
fi
fi
}
# Function to estimate SSD lifespan with TBW remaining - ENHANCED
estimate_ssd_lifespan() {
local power_on_hours=$1
local tbw_used=$2
local disk_model=$3
local capacity_gb=$4
local media_wearout=$5
if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then
echo "Unknown||Unknown||Unknown"
return
fi
local estimated_endurance=$(estimate_ssd_endurance "$disk_model" "$capacity_gb")
local tbw_remaining=$(echo "scale=2; $estimated_endurance - $tbw_used" | bc 2>/dev/null || echo "0")
# If we have media wearout indicator, use it for more accurate estimation
if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then
# For Kingston, SSD_Life_Left is already a percentage
if echo "$disk_model" | grep -qi "KINGSTON"; then
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${media_wearout}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${media_wearout}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${media_wearout}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
else
# For other drives, media_wearout might be countdown from 100
local wear_percent=$media_wearout
if [[ $media_wearout -le 10 ]]; then
echo "${RED}${wear_percent}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}Critical wear${NC}|media_wearout"
elif [[ $media_wearout -le 30 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}High wear${NC}|media_wearout"
elif [[ $media_wearout -le 70 ]]; then
echo "${YELLOW}${wear_percent}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|media_wearout"
else
echo "${GREEN}${wear_percent}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|media_wearout"
fi
fi
return
fi
if [[ $(echo "$tbw_used > 0" | bc 2>/dev/null) -eq 1 ]]; then
local lifespan_used=$(echo "scale=1; $tbw_used * 100 / $estimated_endurance" | bc 2>/dev/null || echo "0")
local lifespan_remaining=$(echo "scale=1; 100 - $lifespan_used" | bc 2>/dev/null || echo "100")
if [[ $(echo "$lifespan_used >= 80" | bc 2>/dev/null) -eq 1 ]]; then
echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|${RED}High wear${NC}|tbw"
elif [[ $(echo "$lifespan_used >= 50" | bc 2>/dev/null) -eq 1 ]]; then
echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|${YELLOW}Moderate wear${NC}|tbw"
else
echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|${GREEN}Healthy${NC}|tbw"
fi
else
echo "Unknown|${estimated_endurance} TB|New|estimated"
fi
}
# Function to estimate HDD lifespan - FIXED POWER_ON_HOURS PARSING
estimate_hdd_lifespan() {
local power_on_hours=$1
local reallocated_sectors=$2
local pending_sectors=$3
# Clean power_on_hours to extract just the numeric part
local clean_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*//')
clean_hours=${clean_hours:-0}
if [[ -z "$clean_hours" || "$clean_hours" -eq 0 ]]; then
echo "Unknown"
return
fi
reallocated_sectors=${reallocated_sectors:-0}
pending_sectors=${pending_sectors:-0}
if [[ "$pending_sectors" -gt 0 ]]; then
echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)"
elif [[ "$reallocated_sectors" -gt 100 ]]; then
echo "${RED}< 6 months${NC} (High reallocated sectors: $reallocated_sectors)"
elif [[ "$reallocated_sectors" -gt 10 ]]; then
echo "${YELLOW}6-12 months${NC} (Reallocated sectors: $reallocated_sectors)"
elif [[ "$clean_hours" -gt 40000 ]]; then
echo "${YELLOW}1-2 years${NC} (High usage: $clean_hours hours)"
elif [[ "$clean_hours" -gt 25000 ]]; then
echo "${GREEN}2-3 years${NC} (Moderate usage: $clean_hours hours)"
else
echo "${GREEN}> 3 years${NC} (Low usage: $clean_hours hours)"
fi
}
# Function to check a single disk with enhanced error handling
check_disk() {
local disk=$1
local controller=$2
print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})"
echo "=================================================="
# Test SMART access level
local access_level=$(test_smart_access "$disk" "$controller")
case $access_level in
"no_access")
print_color $RED "ERROR: Cannot access disk through controller"
echo "Possible reasons:"
echo " - Controller doesn't support SMART passthrough"
echo " - Disk is part of a hardware RAID array"
echo " - Insufficient permissions (try running as root)"
echo " - Controller busy or offline"
echo ""
return
;;
"not_available")
print_color $YELLOW "SMART not available on this disk"
echo "This disk does not support SMART monitoring"
echo ""
return
;;
"disabled")
print_color $YELLOW "SMART is disabled on this disk"
echo "SMART is available but currently disabled"
echo "To enable manually: smartctl -s on ${controller:+-d $controller} $disk"
echo ""
return
;;
"no_attributes")
print_color $YELLOW "WARNING: Cannot read SMART attributes"
echo "This is common with hardware RAID controllers like PERC H730P"
echo "Try checking through the RAID management interface"
echo ""
return
;;
"limited_attributes")
print_color $YELLOW "NOTE: Limited SMART data available"
echo "Controller is filtering some SMART attributes"
;;
esac
# Get disk information
local disk_info=$(get_disk_info "$disk" "$controller")
IFS='|' read -r model serial capacity firmware health_status disk_type power_on_hours reallocated_sectors pending_sectors total_written host_writes_32mib media_wearout <<< "$disk_info"
# Display basic information
echo "Model: ${model:-Unknown}"
echo "Serial: ${serial:-Unknown}"
echo "Type: $disk_type"
echo "Capacity: ${capacity:-Unknown}"
echo "Firmware: ${firmware:-Unknown}"
echo "Health: ${health_status:-Unknown}"
# Only show power on hours if available
if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then
echo "Power On Hours: $power_on_hours"
else
echo "Power On Hours: Unknown"
fi
# Disk type specific analysis
if [[ "$disk_type" == "SSD" ]]; then
# Get the actual attribute name for TBW calculation
local attributes=$(smartctl -A "$disk" 2>/dev/null)
local tbw_attribute_name=$(echo "$attributes" | grep -i "Lifetime_Writes_GiB\|Flash_Writes_GiB\|Data Units Written" | head -1 | awk '{print $2}')
local tbw_used=0
if [[ -n "$total_written" && "$total_written" != "0" ]]; then
tbw_used=$(calculate_tbw "" "$total_written" "$model" "$tbw_attribute_name")
elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then
tbw_used=$(calculate_tbw "$host_writes_32mib" "" "$model" "$tbw_attribute_name")
fi
# Always show TBW information for SSDs
echo "TBW Used: ${tbw_used} TB"
# Estimate capacity for endurance calculation
local capacity_gb=0
if echo "$capacity" | grep -qi "GB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | cut -d. -f1)
elif echo "$capacity" | grep -qi "TB"; then
capacity_gb=$(echo "$capacity" | grep -o '[0-9.]*' | head -1 | awk '{print $1 * 1000}' | bc 2>/dev/null | cut -d. -f1)
fi
local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$model" "$capacity_gb" "$media_wearout")
local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1)
local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2)
local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3)
local wear_source=$(echo "$lifespan_info" | cut -d'|' -f4)
echo "TBW Remaining: $tbw_remaining"
echo "Lifespan: $lifespan_percent ($wear_status)"
# Show wear source if available
if [[ "$wear_source" == "media_wearout" ]]; then
echo "Wear Source: Media Wearout Indicator"
elif [[ "$wear_source" == "tbw" ]]; then
echo "Wear Source: TBW Calculation"
elif [[ "$wear_source" == "estimated" ]]; then
echo "Wear Source: Estimated Endurance"
fi
elif [[ "$disk_type" == "HDD" ]]; then
if [[ -n "$reallocated_sectors" && "$reallocated_sectors" != "0" ]]; then
echo "Realloc Sectors: $reallocated_sectors"
fi
if [[ -n "$pending_sectors" && "$pending_sectors" != "0" ]]; then
echo "Pending Sectors: $pending_sectors"
fi
local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "${reallocated_sectors:-0}" "${pending_sectors:-0}")
echo "Lifespan: $lifespan"
else
print_color $YELLOW "Limited information available for this disk type"
echo "This is normal for hardware RAID configurations like PERC H730P"
echo "For detailed SAS drive information, use controller management tools"
fi
echo ""
}
# Function to detect all disks with enhanced SAS support (no partitions)
detect_disks() {
local disks=()
# Check for SATA/SAS disks - only main devices, no partitions
for disk in /dev/sd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for NVMe disks - only main devices, no partitions
for disk in /dev/nvme[0-9]n[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for SAS disks via SCSI generic - only main devices
for disk in /dev/sg[0-9]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
# Check for other disk types - only main devices
for disk in /dev/vd[a-z] /dev/xvd[a-z]; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
fi
done
echo "${disks[@]}"
}
# Function to detect RAID controllers (Ubuntu specific)
detect_raid_controllers() {
local controllers=("megaraid" "cciss" "areca" "3ware" "hpt")
local raid_disks=()
# Check for RAID controllers
for controller in "${controllers[@]}"; do
for i in {0..31}; do
# Try different disk devices for each controller
for base_disk in "/dev/sda" "/dev/sdb" "/dev/sdc" "/dev/sg0" "/dev/sg1"; do
if [[ -b "$base_disk" ]]; then
if smartctl -d "$controller,$i" -i "$base_disk" &>/dev/null; then
raid_disks+=("$base_disk:$controller,$i")
break
fi
fi
done
done
done
echo "${raid_disks[@]}"
}
# Main function
main() {
print_color $BLUE "Ubuntu 24.04 Disk Health Check Script v$VERSION"
print_color $BLUE "Enhanced with PERC H730P and SAS Support"
print_color $BLUE "============================================"
echo ""
check_dependencies
local disks=()
# If specific disk provided, check only that disk
if [[ $# -gt 0 ]]; then
for disk in "$@"; do
if [[ -b "$disk" ]]; then
disks+=("$disk")
else
print_color $RED "Error: $disk is not a valid block device"
fi
done
else
# Auto-detect disks
print_color $CYAN "Auto-detecting disks (excluding partitions)..."
local direct_disks=()
read -ra direct_disks <<< "$(detect_disks)"
print_color $CYAN "Scanning for RAID controllers..."
local raid_disks=()
read -ra raid_disks <<< "$(detect_raid_controllers)"
# Combine both lists
disks=("${direct_disks[@]}" "${raid_disks[@]}")
fi
if [[ ${#disks[@]} -eq 0 ]]; then
print_color $RED "No disks found or accessible"
echo "Try running as root or specifying disk paths manually"
exit 1
fi
print_color $GREEN "Found ${#disks[@]} disk(s) to check"
echo ""
# Check if running as root, warn if not
if [[ $EUID -ne 0 ]]; then
print_color $YELLOW "Warning: Not running as root."
print_color $YELLOW "Some disks/controllers may show limited information."
echo "For complete results, run as: sudo $0"
echo ""
fi
# Check each disk
for disk_info in "${disks[@]}"; do
# Check if this is a RAID disk (has controller specified)
if [[ "$disk_info" == *":"* ]]; then
IFS=':' read -r disk controller <<< "$disk_info"
check_disk "$disk" "$controller"
else
check_disk "$disk_info"
fi
done
print_color $BLUE "Check completed!"
echo ""
print_color $CYAN "Note: For PERC H730P controllers with SAS drives:"
print_color $CYAN " - Install 'storcli' for detailed controller information"
print_color $CYAN " - Use 'smartctl -d sat /dev/sgX' to try direct access"
print_color $CYAN " - Hardware RAID controllers often limit SMART data access"
echo ""
print_color $CYAN "Ubuntu-specific tips:"
print_color $CYAN " - Use 'lsblk' to see all available block devices"
print_color $CYAN " - Use 'lshw -class disk' for detailed disk information"
}
# Usage information
usage() {
echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]"
echo ""
echo "If no disks specified, auto-detects all available disks"
echo ""
echo "Examples:"
echo " $SCRIPT_NAME # Check all auto-detected disks"
echo " sudo $SCRIPT_NAME # Check all disks (as root)"
echo " $SCRIPT_NAME /dev/sda # Check specific disk"
echo " $SCRIPT_NAME /dev/nvme0n1 # Check NVMe disk"
echo " $SCRIPT_NAME /dev/sg0 # Check SAS disk directly"
echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks"
}
# Parse command line arguments
case "${1:-}" in
-h|--help)
usage
exit 0
;;
-v|--version)
echo "$SCRIPT_NAME version $VERSION"
exit 0
;;
*)
main "$@"
;;
esac