diff --git a/harvester-v3.8-old.sh b/harvester-v3.8-old.sh new file mode 100644 index 0000000..702640f --- /dev/null +++ b/harvester-v3.8-old.sh @@ -0,0 +1,864 @@ +#!/bin/bash + +# Disk Health Check Script for Harvester OS +# Checks SATA HDD, SATA SSD, SAS, NVMe, RAID controllers, and soft-raid +# Supports consumer and enterprise disk classification + +SCRIPT_NAME=$(basename "$0") +VERSION="3.8" + +# Color codes +RED=$(tput setaf 1) +GREEN=$(tput setaf 2) +YELLOW=$(tput setaf 3) +BLUE=$(tput setaf 4) +CYAN=$(tput setaf 6) +MAGENTA=$(tput setaf 5) +NC=$(tput sgr0) + +# Function to print colored output +print_color() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Check if required commands are installed +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +if ! command_exists smartctl; then + print_color $RED "Error: smartctl is not installed. Please install smartmontools package." + exit 1 +fi + +# Known model capacities +declare -A MODEL_CAPACITIES=( + ["ST91000640NS"]="1000" + ["ST2000NM0033"]="2000" + ["ST4000NM0033"]="4000" + ["MB1000GCWCV"]="1000" + ["MB2000GCWDB"]="2000" + ["AL15SEB120N"]="1200" + ["AL15SEB600N"]="600" + ["HUC101212CSS600"]="1200" + ["HUC103012CSS600"]="3000" + ["HUC109090CSS600"]="900" + ["MAX3147RC"]="147" + ["ST3146356SS"]="146" + ["ST3146855SS"]="146" + ["ST33000650SS"]="3000" + ["ST3600057SS"]="600" + ["ST9146803SS"]="146" + ["ST973451SS"]="73" + ["AL13SXB300N"]="300" + ["KPM6XRUG960G"]="960" + ["MZILT3T8HBLS0D3"]="3840" + ["MZILT960HBHQ0D3"]="960" + # Add more models as encountered +) + +# TBW endurance standards (using lowest numbers) +declare -A CONSUMER_TBW=( + ["250"]=150 + ["500"]=300 + ["1000"]=600 + ["2000"]=1200 + ["4000"]=2400 + ["8000"]=4800 +) + +declare -A ENTERPRISE_TBW=( + ["250"]=450 + ["500"]=900 + ["1000"]=1800 + ["2000"]=3600 + ["4000"]=7200 + ["8000"]=14400 +) + +# Function to get closest capacity tier +get_capacity_tier() { + local capacity_gb=$1 + local tiers=("250" "500" "1000" "2000" "4000" "8000") + + for tier in "${tiers[@]}"; do + if [[ $capacity_gb -le $tier ]]; then + echo $tier + return + fi + done + echo "8000" +} + +# Function to extract numeric hours from power_on_hours field +extract_numeric_hours() { + local power_on_hours=$1 + local numeric_hours=$(echo "$power_on_hours" | sed 's/[^0-9].*$//') + if [[ -n "$numeric_hours" && "$numeric_hours" =~ ^[0-9]+$ ]]; then + echo "$numeric_hours" + else + echo "0" + fi +} + +# Function to get disk type and interface +get_disk_info() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + smart_cmd+=" -i $disk" + + local info=$($smart_cmd 2>/dev/null) + local transport="" + local disk_type="UNKNOWN" + local is_enterprise=false + + # Check if it's NVMe + if [[ "$disk" == /dev/nvme* ]] || echo "$info" | grep -qi "NVMe"; then + disk_type="NVMe" + transport="NVMe" + # Check for SAS + elif echo "$info" | grep -qi "SAS"; then + transport="SAS" + is_enterprise=true + # Determine if SAS disk is HDD or SSD + if echo "$info" | grep -qi "Solid State Device\|SSD"; then + disk_type="SAS SSD" + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="SAS HDD" + else + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -n "$model" ]]; then + if echo "$model" | grep -qi "SSD\|Solid State"; then + disk_type="SAS SSD" + else + disk_type="SAS HDD" + fi + else + disk_type="SAS HDD" + fi + fi + # Check for SATA SSD + elif echo "$info" | grep -qi "Solid State Device\|SSD"; then + disk_type="SATA SSD" + transport="SATA" + # Check for SATA HDD + elif echo "$info" | grep -qi "Rotation Rate"; then + disk_type="SATA HDD" + transport="SATA" + fi + + # Check for enterprise features + if echo "$info" | grep -qi "ENTERPRISE\|EP\|SAS\|Xeon\|Xeons\|DualPort\|PowerLoss\|PLP"; then + is_enterprise=true + fi + + # Check device type by model name + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + if [[ -n "$model" ]]; then + if echo "$model" | grep -qi "PRO\|EP\|DC\|ENT\|ENTERPRISE"; then + is_enterprise=true + fi + if echo "$model" | grep -qi "SSD\|Solid State" && [[ "$disk_type" == "UNKNOWN" ]]; then + disk_type="SSD" + [[ "$transport" == "" ]] && transport="SATA" + fi + fi + + if [[ "$disk_type" == "UNKNOWN" ]]; then + disk_type="Unknown" + fi + + echo "$disk_type|$transport|$is_enterprise" +} + +# Function to get SAS disk attributes +get_sas_attributes() { + local disk=$1 + local controller=$2 + local disk_type=$3 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + local power_on_hours="" + local reallocated_sectors="" + local pending_sectors="" + local start_stop_count="" + local load_cycle_count="" + local total_written="" + local temperature="" + local model="" + local serial="" + local firmware="" + local media_wearout="" + local percent_lifetime_used="" + local has_write_data=false + + # Try extended information first for SAS disks + local attributes=$($smart_cmd -x "$disk" 2>/dev/null) + + # If extended fails, try standard attributes + if [[ -z "$attributes" ]]; then + attributes=$($smart_cmd -a "$disk" 2>/dev/null) + fi + + if [[ -n "$attributes" ]]; then + # Extract model information + model=$(echo "$attributes" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$attributes" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + serial=$(echo "$attributes" | grep -i "Serial number:" | cut -d: -f2 | sed 's/^[ \t]*//') + firmware=$(echo "$attributes" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract power on hours - try multiple formats + power_on_hours=$(echo "$attributes" | grep -i "Accumulated power on time" | grep -oE "[0-9]+:[0-9]+" | head -1) + if [[ -n "$power_on_hours" ]]; then + local hours=$(echo "$power_on_hours" | cut -d: -f1) + power_on_hours="$hours" + else + # Try alternative format + power_on_hours=$(echo "$attributes" | grep -i "Power_On_Hours" | awk '{print $NF}' | head -1) + fi + + # Extract temperature + temperature=$(echo "$attributes" | grep -i "Current Drive Temperature" | grep -oE "[0-9]+" | head -1) + [[ -z "$temperature" ]] && temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}' | head -1) + + # Extract mechanical counters for SAS HDDs + if [[ "$disk_type" == "SAS HDD" ]]; then + start_stop_count=$(echo "$attributes" | grep -i "Accumulated start-stop cycles" | grep -oE "[0-9]+" | head -1) + load_cycle_count=$(echo "$attributes" | grep -i "Accumulated load-unload cycles" | grep -oE "[0-9]+" | head -1) + fi + + # Extract error counters + local error_count=$(echo "$attributes" | grep -i "Elements in grown defect list" | grep -oE "[0-9]+" | head -1) + if [[ -n "$error_count" ]]; then + reallocated_sectors="$error_count" + fi + + # For SAS SSDs, look for comprehensive wear indicators + if [[ "$disk_type" == "SAS SSD" ]]; then + # Try multiple patterns for write data + total_written=$(echo "$attributes" | grep -i "Total_LBAs_Written" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Host_Writes_32MiB" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "Lifetime_Writes" | awk '{print $NF}' | head -1) + [[ -z "$total_written" ]] && total_written=$(echo "$attributes" | grep -i "NAND_Writes" | awk '{print $NF}' | head -1) + + # Check if we actually found write data + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + has_write_data=true + fi + + # Look for wear level indicators + media_wearout=$(echo "$attributes" | grep -i "Media_Wearout_Indicator" | awk '{print $NF}' | head -1) + percent_lifetime_used=$(echo "$attributes" | grep -i "Percent_Lifetime_Used" | awk '{print $NF}' | head -1) + [[ -z "$percent_lifetime_used" ]] && percent_lifetime_used=$(echo "$attributes" | grep -i "Wear_Leveling_Count" | awk '{print $NF}' | head -1) + fi + fi + + echo "$power_on_hours|$reallocated_sectors|$pending_sectors|$start_stop_count|$load_cycle_count|$temperature|$model|$serial|$firmware|$total_written|$media_wearout|$percent_lifetime_used|$has_write_data" +} + +# Function to calculate TBW for SSD/NVMe +calculate_tbw() { + local disk_type=$1 + local raw_value=$2 + local sectors=$3 + + local tbw=0 + + if [[ -n "$sectors" && "$sectors" != "0" ]]; then + # Calculate from sectors (most common for SATA SSDs) + local bytes=$((sectors * 512)) + tbw=$((bytes / 1000000000000)) + elif [[ -n "$raw_value" && "$raw_value" != "0" ]]; then + if [[ "$disk_type" == "NVMe" ]]; then + # NVMe: raw value is in 32MB units + tbw=$((raw_value * 32 / 1000000)) + else + # SATA/SAS SSD: various manufacturers + tbw=$((raw_value * 32 / 1000000)) + fi + fi + + echo "$tbw" +} + +# Function to get estimated endurance - SIMPLIFIED FOR SAS SSDs +get_estimated_endurance() { + local capacity_gb=$1 + local is_enterprise=$2 + local disk_type=$3 + local has_write_data=$4 + + # HDDs don't have TBW + if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" || "$disk_type" == "HDD" ]]; then + echo "N/A" + return + fi + + # For SAS SSDs without write data, don't provide unrealistic estimates + if [[ "$disk_type" == "SAS SSD" && "$has_write_data" == "false" ]]; then + echo "UNKNOWN" + return + fi + + local capacity_tier=$(get_capacity_tier "$capacity_gb") + + if [[ "$is_enterprise" == "true" ]]; then + echo "${ENTERPRISE_TBW[$capacity_tier]}" + else + echo "${CONSUMER_TBW[$capacity_tier]}" + fi +} + +# Function to estimate SSD lifespan with TBW remaining +estimate_ssd_lifespan() { + local power_on_hours=$1 + local tbw_used=$2 + local estimated_endurance=$3 + local disk_type=$4 + local percent_lifetime_used=$5 + local has_write_data=$6 + + # For SAS SSDs without write data, be honest about limitations + if [[ "$disk_type" == "SAS SSD" && "$has_write_data" == "false" ]]; then + echo "N/A|N/A|Cannot determine - SAS SSD does not expose write statistics" + return + fi + + if [[ -z "$power_on_hours" || "$power_on_hours" -eq 0 ]]; then + echo "Unknown|Unknown|New drive" + return + fi + + if [[ "$estimated_endurance" == "N/A" ]]; then + echo "N/A|N/A|HDD - no endurance rating" + return + fi + + local clean_tbw_used=$(echo "$tbw_used" | sed 's/[^0-9.]//g') + if [[ -z "$clean_tbw_used" ]]; then + clean_tbw_used=0 + fi + + local tbw_remaining=$((estimated_endurance - clean_tbw_used)) + + if [[ $clean_tbw_used -gt 0 ]]; then + local lifespan_used=$((clean_tbw_used * 100 / estimated_endurance)) + local lifespan_remaining=$((100 - lifespan_used)) + + if [[ $lifespan_used -ge 80 ]]; then + echo "${RED}${lifespan_remaining}%${NC}|${RED}${tbw_remaining} TB${NC}|High wear" + elif [[ $lifespan_used -ge 50 ]]; then + echo "${YELLOW}${lifespan_remaining}%${NC}|${YELLOW}${tbw_remaining} TB${NC}|Moderate wear" + else + echo "${GREEN}${lifespan_remaining}%${NC}|${GREEN}${tbw_remaining} TB${NC}|Healthy" + fi + else + echo "Unknown|${estimated_endurance} TB|New" + fi +} + +# Function to estimate HDD lifespan +estimate_hdd_lifespan() { + local power_on_hours=$1 + local reallocated_sectors=$2 + local pending_sectors=$3 + local start_stop_count=$4 + local load_cycle_count=$5 + local disk_type=$6 + local temperature=$7 + + local numeric_hours=$(extract_numeric_hours "$power_on_hours") + + if [[ -z "$numeric_hours" || "$numeric_hours" -eq 0 ]]; then + echo "Unknown" + return + fi + + local severity=0 + + # Critical issues + if [[ "$pending_sectors" -gt 0 ]]; then + echo "${RED}CRITICAL${NC} (Pending sectors: $pending_sectors)" + return + elif [[ "$reallocated_sectors" -gt 100 ]]; then + severity=$((severity + 3)) + elif [[ "$reallocated_sectors" -gt 10 ]]; then + severity=$((severity + 2)) + elif [[ "$reallocated_sectors" -gt 0 ]]; then + severity=$((severity + 1)) + fi + + # Temperature warning + if [[ -n "$temperature" && "$temperature" -gt 50 ]]; then + severity=$((severity + 2)) + elif [[ -n "$temperature" && "$temperature" -gt 40 ]]; then + severity=$((severity + 1)) + fi + + # Usage-based assessment + if [[ "$numeric_hours" -gt 50000 ]]; then + severity=$((severity + 3)) + elif [[ "$numeric_hours" -gt 30000 ]]; then + severity=$((severity + 2)) + elif [[ "$numeric_hours" -gt 15000 ]]; then + severity=$((severity + 1)) + fi + + # Mechanical wear (for HDDs) + if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then + if [[ "$start_stop_count" -gt 50000 ]]; then + severity=$((severity + 2)) + elif [[ "$start_stop_count" -gt 20000 ]]; then + severity=$((severity + 1)) + fi + + if [[ "$load_cycle_count" -gt 500000 ]]; then + severity=$((severity + 2)) + elif [[ "$load_cycle_count" -gt 200000 ]]; then + severity=$((severity + 1)) + fi + fi + + if [[ $severity -ge 5 ]]; then + echo "${RED}< 6 months${NC} (Multiple risk factors)" + elif [[ $severity -ge 3 ]]; then + echo "${YELLOW}6-18 months${NC} (Moderate wear)" + elif [[ $severity -ge 1 ]]; then + echo "${YELLOW}1-3 years${NC} (Light wear)" + else + echo "${GREEN}> 3 years${NC} (Healthy)" + fi +} + +# Function to check soft-raid (MDRAID) +check_mdraid() { + local md_devices=() + + if [[ -f /proc/mdstat ]]; then + while IFS= read -r line; do + if [[ $line =~ ^md[0-9]+ ]]; then + md_devices+=("/dev/${line%% *}") + fi + done < /proc/mdstat + fi + + for md in "${md_devices[@]}"; do + if [[ -b "$md" ]]; then + print_color $MAGENTA "Found software RAID: $md" + if command_exists mdadm; then + local md_info=$(mdadm --detail "$md" 2>/dev/null) + if [[ -n "$md_info" ]]; then + echo "RAID Level: $(echo "$md_info" | grep "Raid Level" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "State: $(echo "$md_info" | grep "State" | head -1 | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "Devices: $(echo "$md_info" | grep "Active Devices" | cut -d: -f2 | sed 's/^[ \t]*//')" + echo "" + fi + fi + fi + done +} + +# Function to get capacity using direct block device methods +get_disk_capacity() { + local disk=$1 + local controller=$2 + local disk_type=$3 + + local capacity_gb=0 + local capacity_human="Unknown" + + # Method 1: Try lsblk first + if command_exists lsblk; then + local lsblk_output=$(lsblk -b "$disk" -o SIZE -n 2>/dev/null) + if [[ -n "$lsblk_output" && "$lsblk_output" =~ ^[0-9]+$ ]]; then + capacity_gb=$((lsblk_output / 1000000000)) + fi + fi + + # Method 2: Try fdisk + if [[ $capacity_gb -eq 0 ]] && command_exists fdisk; then + local fdisk_info=$(fdisk -l "$disk" 2>/dev/null | grep "Disk $disk") + if [[ -n "$fdisk_info" ]]; then + if [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])iB ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$((size * 1000)) + else + capacity_gb=$size + fi + elif [[ $fdisk_info =~ ([0-9,.]+)\s*([GT])B ]]; then + local size=$(echo "${BASH_REMATCH[1]}" | tr -d ',') + local unit="${BASH_REMATCH[2]}" + if [[ "$unit" == "T" ]]; then + capacity_gb=$((size * 1000)) + else + capacity_gb=$size + fi + elif [[ $fdisk_info =~ ([0-9,.]+)\s*bytes ]]; then + local bytes=$(echo "$fdisk_info" | grep -oE '[0-9,]+' | head -1 | tr -d ',') + capacity_gb=$((bytes / 1000000000)) + fi + fi + fi + + # Method 3: Try blockdev + if [[ $capacity_gb -eq 0 ]] && command_exists blockdev; then + local blockdev_size=$(blockdev --getsize64 "$disk" 2>/dev/null) + if [[ -n "$blockdev_size" && "$blockdev_size" =~ ^[0-9]+$ ]]; then + capacity_gb=$((blockdev_size / 1000000000)) + fi + fi + + # Method 4: Model-based lookup + if [[ $capacity_gb -eq 0 ]]; then + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + local model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$($smart_cmd -i "$disk" 2>/dev/null | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') + + if [[ -n "$model" && -n "${MODEL_CAPACITIES[$model]}" ]]; then + capacity_gb="${MODEL_CAPACITIES[$model]}" + fi + fi + + # Generate human readable capacity + if [[ $capacity_gb -gt 0 ]]; then + if [[ $capacity_gb -ge 1000 ]]; then + local tb_capacity=$((capacity_gb / 1000)) + capacity_human="${tb_capacity} TB" + else + capacity_human="${capacity_gb} GB" + fi + else + capacity_human="Unknown" + fi + + echo "$capacity_gb|$capacity_human" +} + +# Function to check a single disk +check_disk() { + local disk=$1 + local controller=$2 + + local smart_cmd="smartctl" + [[ -n "$controller" ]] && smart_cmd+=" -d $controller" + + print_color $CYAN "Checking disk: $disk (Controller: ${controller:-direct})" + echo "==================================================" + + # Get disk information + local disk_info=$(get_disk_info "$disk" "$controller") + local disk_type=$(echo "$disk_info" | cut -d'|' -f1) + local transport=$(echo "$disk_info" | cut -d'|' -f2) + local is_enterprise=$(echo "$disk_info" | cut -d'|' -f3) + + # Get basic disk information + local info=$($smart_cmd -i "$disk" 2>/dev/null) + local health=$($smart_cmd -H "$disk" 2>/dev/null) + + # Check if we can read the disk + if [[ $? -ne 0 ]] || [[ -z "$info" ]]; then + print_color $YELLOW "Cannot read disk information. It may be offline, unsupported, or need controller specification." + echo "" + return + fi + + # Extract disk information + local model=$(echo "$info" | grep -i "Device Model:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$model" ]] && model=$(echo "$info" | grep -i "Product:" | cut -d: -f2 | sed 's/^[ \t]*//') + + local serial=$(echo "$info" | grep -i "Serial Number:" | cut -d: -f2 | sed 's/^[ \t]*//') + local firmware=$(echo "$info" | grep -i "Firmware Version:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$firmware" ]] && firmware=$(echo "$info" | grep -i "Revision:" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Get capacity + local capacity_info=$(get_disk_capacity "$disk" "$controller" "$disk_type") + local capacity_gb=$(echo "$capacity_info" | cut -d'|' -f1) + local capacity_human=$(echo "$capacity_info" | cut -d'|' -f2) + + local health_status=$(echo "$health" | grep "result:" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART overall-health" | cut -d: -f2 | sed 's/^[ \t]*//') + [[ -z "$health_status" ]] && health_status=$(echo "$health" | grep "SMART Health Status" | cut -d: -f2 | sed 's/^[ \t]*//') + + # Extract SMART attributes based on disk type + local power_on_hours="" + local reallocated_sectors="" + local pending_sectors="" + local start_stop_count="" + local load_cycle_count="" + local total_written="" + local host_writes_32mib="" + local temperature="" + local sas_model="" + local sas_serial="" + local sas_firmware="" + local sas_total_written="" + local media_wearout="" + local percent_lifetime_used="" + local has_write_data="false" + + if [[ "$disk_type" == "SAS HDD" || "$disk_type" == "SAS SSD" ]]; then + local sas_attrs=$(get_sas_attributes "$disk" "$controller" "$disk_type") + power_on_hours=$(echo "$sas_attrs" | cut -d'|' -f1) + reallocated_sectors=$(echo "$sas_attrs" | cut -d'|' -f2) + pending_sectors=$(echo "$sas_attrs" | cut -d'|' -f3) + start_stop_count=$(echo "$sas_attrs" | cut -d'|' -f4) + load_cycle_count=$(echo "$sas_attrs" | cut -d'|' -f5) + temperature=$(echo "$sas_attrs" | cut -d'|' -f6) + sas_model=$(echo "$sas_attrs" | cut -d'|' -f7) + sas_serial=$(echo "$sas_attrs" | cut -d'|' -f8) + sas_firmware=$(echo "$sas_attrs" | cut -d'|' -f9) + sas_total_written=$(echo "$sas_attrs" | cut -d'|' -f10) + media_wearout=$(echo "$sas_attrs" | cut -d'|' -f11) + percent_lifetime_used=$(echo "$sas_attrs" | cut -d'|' -f12) + has_write_data=$(echo "$sas_attrs" | cut -d'|' -f13) + + # Use SAS-extracted data if available + [[ -n "$sas_model" ]] && model="$sas_model" + [[ -n "$sas_serial" ]] && serial="$sas_serial" + [[ -n "$sas_firmware" ]] && firmware="$sas_firmware" + [[ -n "$sas_total_written" ]] && total_written="$sas_total_written" + else + local attributes=$($smart_cmd -A "$disk" 2>/dev/null) + if [[ -n "$attributes" ]]; then + power_on_hours=$(echo "$attributes" | grep "Power_On_Hours" | awk '{print $10}') + reallocated_sectors=$(echo "$attributes" | grep "Reallocated_Sector_Ct" | awk '{print $10}') + pending_sectors=$(echo "$attributes" | grep "Current_Pending_Sector" | awk '{print $10}') + start_stop_count=$(echo "$attributes" | grep "Start_Stop_Count" | awk '{print $10}') + load_cycle_count=$(echo "$attributes" | grep "Load_Cycle_Count" | awk '{print $10}') + total_written=$(echo "$attributes" | grep -E "Total_LBAs_Written|Host_Writes_32MiB" | awk '{print $10}') + host_writes_32mib=$(echo "$attributes" | grep "Host_Writes_32MiB" | awk '{print $10}') + temperature=$(echo "$attributes" | grep -i "Temperature_Celsius" | awk '{print $10}') + + # For non-SAS SSDs, we assume write data is available if we found any + if [[ "$disk_type" == "SATA SSD" || "$disk_type" == "NVMe" ]]; then + if [[ -n "$total_written" || -n "$host_writes_32mib" ]]; then + has_write_data="true" + fi + fi + fi + fi + + # Clean up extracted values + power_on_hours=$(extract_numeric_hours "$power_on_hours") + reallocated_sectors=${reallocated_sectors:-0} + pending_sectors=${pending_sectors:-0} + start_stop_count=${start_stop_count:-0} + load_cycle_count=${load_cycle_count:-0} + + # Display basic information + echo "Model: ${model:-Unknown}" + echo "Serial: ${serial:-Unknown}" + echo "Type: $disk_type" + echo "Interface: $transport" + echo "Class: $($is_enterprise && echo "Enterprise" || echo "Consumer")" + echo "Capacity: $capacity_human" + echo "Firmware: ${firmware:-Unknown}" + echo "Health: ${health_status:-Unknown}" + + # Show temperature if available + if [[ -n "$temperature" && "$temperature" != "0" ]]; then + echo "Temperature: ${temperature} C" + fi + + # Only show Power On Hours if we have a valid value + if [[ -n "$power_on_hours" && "$power_on_hours" != "0" ]]; then + echo "Power On Hours: $power_on_hours" + else + echo "Power On Hours: Unknown" + fi + + # Show wear indicators for SSDs if available + if [[ "$disk_type" == "SAS SSD" || "$disk_type" == "SATA SSD" ]]; then + if [[ -n "$media_wearout" && "$media_wearout" != "0" ]]; then + echo "Media Wearout: $media_wearout" + fi + if [[ -n "$percent_lifetime_used" && "$percent_lifetime_used" != "0" ]]; then + echo "Lifetime Used: ${percent_lifetime_used}%" + fi + fi + + # Disk type specific analysis + if [[ "$disk_type" == "SATA HDD" || "$disk_type" == "SAS HDD" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + echo "Pending Sectors: $pending_sectors" + + # Only show mechanical counters if we have values + if [[ -n "$start_stop_count" && "$start_stop_count" != "0" ]]; then + echo "Start/Stop Count: $start_stop_count" + fi + if [[ -n "$load_cycle_count" && "$load_cycle_count" != "0" ]]; then + echo "Load Cycle Count: $load_cycle_count" + fi + + local lifespan=$(estimate_hdd_lifespan "$power_on_hours" "$reallocated_sectors" "$pending_sectors" "$start_stop_count" "$load_cycle_count" "$disk_type" "$temperature") + echo "Lifespan: $lifespan" + + elif [[ "$disk_type" == "SATA SSD" || "$disk_type" == "SAS SSD" || "$disk_type" == "NVMe" ]]; then + local tbw_used=0 + if [[ -n "$total_written" && "$total_written" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "" "$total_written") + elif [[ -n "$host_writes_32mib" && "$host_writes_32mib" != "0" ]]; then + tbw_used=$(calculate_tbw "$disk_type" "$host_writes_32mib" "") + fi + + local estimated_endurance=$(get_estimated_endurance "$capacity_gb" "$is_enterprise" "$disk_type" "$has_write_data") + + # Handle SAS SSDs without write data specially + if [[ "$disk_type" == "SAS SSD" && "$estimated_endurance" == "UNKNOWN" ]]; then + echo "TBW Used: Not available" + echo "TBW Endurance: Not available (SAS SSD does not expose write statistics)" + echo "Lifespan: ${GREEN}Healthy${NC} (based on SMART health status)" + else + if [[ "$estimated_endurance" != "N/A" ]]; then + echo "TBW Used: ${tbw_used} TB" + echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + + local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type" "$percent_lifetime_used" "$has_write_data") + local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) + local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + + echo "TBW Remaining: $tbw_remaining" + echo "Lifespan: $lifespan_percent ($wear_status)" + else + echo "TBW Used: ${tbw_used} TB" + echo "Lifespan: Unknown (Cannot estimate without usage data)" + fi + fi + + if [[ "$disk_type" == "SAS SSD" ]]; then + echo "Realloc Sectors: $reallocated_sectors" + echo "Pending Sectors: $pending_sectors" + fi + else + print_color $YELLOW "Unknown disk type - limited information available" + echo "Realloc Sectors: $reallocated_sectors" + echo "Pending Sectors: $pending_sectors" + fi + + echo "" +} + +# Function to detect RAID controllers and disks +detect_raid_disks() { + local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") + local disks=() + + # Check for direct disks first + for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + + # Check for RAID controllers + for controller in "${controllers[@]}"; do + for i in {0..31}; do + for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do + if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then + disks+=("$base_disk:$controller,$i") + break + fi + done + done + done + + echo "${disks[@]}" +} + +# Main function +main() { + print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" + print_color $BLUE "====================================================" + echo "" + + local disks=() + + # Check for soft-raid first + check_mdraid + + # If specific disk provided, check only that disk + if [[ $# -gt 0 ]]; then + for disk in "$@"; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + else + print_color $RED "Error: $disk is not a valid block device" + fi + done + else + # Auto-detect disks + print_color $CYAN "Auto-detecting disks..." + read -ra disks <<< "$(detect_raid_disks)" + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $YELLOW "No disks found via auto-detection" + print_color $CYAN "Trying direct disk access..." + for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do + if [[ -b "$disk" ]]; then + disks+=("$disk:direct") + fi + done + fi + + if [[ ${#disks[@]} -eq 0 ]]; then + print_color $RED "No disks found or accessible" + echo "Try running as root or specifying disk paths manually" + exit 1 + fi + + print_color $GREEN "Found ${#disks[@]} disk(s) to check" + echo "" + + # Check each disk + for disk_info in "${disks[@]}"; do + IFS=':' read -r disk controller <<< "$disk_info" + check_disk "$disk" "$controller" + done + + print_color $BLUE "Check completed!" + echo "" + print_color $YELLOW "Note: SAS SSDs often do not expose write statistics through SMART." + print_color $YELLOW " TBW information may not be available for these drives." +} + +# Usage information +usage() { + echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" + echo "" + echo "If no disks specified, auto-detects all available disks and RAID arrays" + echo "" + echo "Examples:" + echo " $SCRIPT_NAME # Check all auto-detected disks" + echo " $SCRIPT_NAME /dev/sda # Check specific disk" + echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" + echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "" + echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" +} + +# Parse command line arguments +case "${1:-}" in + -h|--help) + usage + exit 0 + ;; + -v|--version) + echo "$SCRIPT_NAME version $VERSION" + exit 0 + ;; + *) + main "$@" + ;; +esac diff --git a/harvester-v3.8.sh b/harvester-v3.8.sh index 702640f..385e163 100644 --- a/harvester-v3.8.sh +++ b/harvester-v3.8.sh @@ -6,6 +6,7 @@ SCRIPT_NAME=$(basename "$0") VERSION="3.8" +CREATOR="Adam T. Lau" # Color codes RED=$(tput setaf 1) @@ -721,144 +722,169 @@ check_disk() { else if [[ "$estimated_endurance" != "N/A" ]]; then echo "TBW Used: ${tbw_used} TB" - echo "TBW Endurance: ${estimated_endurance} TB (Minimum guaranteed - actual may be higher)" + echo "TBW Endurance: ${estimated_endurance} TB" local lifespan_info=$(estimate_ssd_lifespan "$power_on_hours" "$tbw_used" "$estimated_endurance" "$disk_type" "$percent_lifetime_used" "$has_write_data") - local lifespan_percent=$(echo "$lifespan_info" | cut -d'|' -f1) + local lifespan_remaining=$(echo "$lifespan_info" | cut -d'|' -f1) local tbw_remaining=$(echo "$lifespan_info" | cut -d'|' -f2) - local wear_status=$(echo "$lifespan_info" | cut -d'|' -f3) + local lifespan_status=$(echo "$lifespan_info" | cut -d'|' -f3) echo "TBW Remaining: $tbw_remaining" - echo "Lifespan: $lifespan_percent ($wear_status)" + echo "Lifespan: $lifespan_remaining ($lifespan_status)" else - echo "TBW Used: ${tbw_used} TB" - echo "Lifespan: Unknown (Cannot estimate without usage data)" + echo "Endurance: N/A" fi fi - - if [[ "$disk_type" == "SAS SSD" ]]; then - echo "Realloc Sectors: $reallocated_sectors" - echo "Pending Sectors: $pending_sectors" - fi - else - print_color $YELLOW "Unknown disk type - limited information available" - echo "Realloc Sectors: $reallocated_sectors" - echo "Pending Sectors: $pending_sectors" fi echo "" } -# Function to detect RAID controllers and disks -detect_raid_disks() { - local controllers=("megaraid" "cciss" "areca" "3ware" "hpt" "aacraid" "auto") - local disks=() - - # Check for direct disks first - for disk in /dev/sd[a-z] /dev/sd[a-z][a-z] /dev/nvme[0-9]n[0-9]; do - if [[ -b "$disk" ]]; then - disks+=("$disk:direct") - fi - done - - # Check for RAID controllers - for controller in "${controllers[@]}"; do - for i in {0..31}; do - for base_disk in "/dev/sda" "/dev/sg$i" "/dev/sr$i"; do - if smartctl -d "$controller,$i" -i "$base_disk" >/dev/null 2>&1; then - disks+=("$base_disk:$controller,$i") - break - fi - done - done - done - - echo "${disks[@]}" -} - -# Main function -main() { - print_color $BLUE "Disk Health Check Script v$VERSION for Harvester OS" - print_color $BLUE "====================================================" +# Function to detect and check all disks +check_all_disks() { + print_color $GREEN "Disk Health Check Script v$VERSION" + print_color $BLUE "Creator: $CREATOR" echo "" - local disks=() + # Check for RAID controllers first + print_color $MAGENTA "Checking for RAID controllers..." + local raid_controllers=() - # Check for soft-raid first + # Check for MegaRAID + if command_exists storcli64; then + raid_controllers+=("megaraid") + print_color $GREEN "Found MegaRAID controller (storcli64)" + elif command_exists storcli; then + raid_controllers+=("megaraid") + print_color $GREEN "Found MegaRAID controller (storcli)" + fi + + # Check for 3ware + if command_exists tw_cli; then + raid_controllers+=("3ware") + print_color $GREEN "Found 3ware controller (tw_cli)" + fi + + # Check for Areca + if command_exists cli64; then + raid_controllers+=("areca") + print_color $GREEN "Found Areca controller (cli64)" + fi + + # Check for HP Smart Array + if command_exists hpssacli; then + raid_controllers+=("hpssacli") + print_color $GREEN "Found HP Smart Array controller (hpssacli)" + elif command_exists ssacli; then + raid_controllers+=("hpssacli") + print_color $GREEN "Found HP Smart Array controller (ssacli)" + fi + + # Check for Adaptec + if command_exists arcconf; then + raid_controllers+=("adaptec") + print_color $GREEN "Found Adaptec controller (arcconf)" + fi + + # Check software RAID check_mdraid - # If specific disk provided, check only that disk - if [[ $# -gt 0 ]]; then - for disk in "$@"; do - if [[ -b "$disk" ]]; then - disks+=("$disk:direct") - else - print_color $RED "Error: $disk is not a valid block device" - fi - done - else - # Auto-detect disks - print_color $CYAN "Auto-detecting disks..." - read -ra disks <<< "$(detect_raid_disks)" - fi - - if [[ ${#disks[@]} -eq 0 ]]; then - print_color $YELLOW "No disks found via auto-detection" - print_color $CYAN "Trying direct disk access..." - for disk in /dev/sda /dev/sdb /dev/sdc /dev/nvme0n1; do - if [[ -b "$disk" ]]; then - disks+=("$disk:direct") - fi - done - fi - - if [[ ${#disks[@]} -eq 0 ]]; then - print_color $RED "No disks found or accessible" - echo "Try running as root or specifying disk paths manually" - exit 1 - fi - - print_color $GREEN "Found ${#disks[@]} disk(s) to check" - echo "" - - # Check each disk - for disk_info in "${disks[@]}"; do - IFS=':' read -r disk controller <<< "$disk_info" - check_disk "$disk" "$controller" + # Check NVMe drives + print_color $MAGENTA "Checking NVMe drives..." + local nvme_devices=() + for dev in /dev/nvme*; do + if [[ -b "$dev" && "$dev" =~ /dev/nvme[0-9]+n[0-9]+$ ]]; then + nvme_devices+=("$dev") + fi done - print_color $BLUE "Check completed!" + if [[ ${#nvme_devices[@]} -gt 0 ]]; then + for nvme in "${nvme_devices[@]}"; do + check_disk "$nvme" "" + done + else + print_color $YELLOW "No NVMe drives found" + fi + + # Check direct SATA/SAS drives + print_color $MAGENTA "Checking direct SATA/SAS drives..." + local direct_devices=() + for dev in /dev/sd*; do + if [[ -b "$dev" && "$dev" =~ /dev/sd[a-z]+$ ]]; then + direct_devices+=("$dev") + fi + done + + if [[ ${#direct_devices[@]} -gt 0 ]]; then + for dev in "${direct_devices[@]}"; do + check_disk "$dev" "" + done + else + print_color $YELLOW "No direct SATA/SAS drives found" + fi + + # Check drives behind RAID controllers + for controller_type in "${raid_controllers[@]}"; do + print_color $MAGENTA "Checking drives behind $controller_type controller..." + + case $controller_type in + megaraid) + if command_exists storcli64; then + local enclosures=$(storcli64 /c0 show | grep -E "^[0-9]+" | awk '{print $1}') + for enc in $enclosures; do + local drives=$(storcli64 /c0/e$enc/sall show | grep -E "^[0-9]+" | awk '{print $1}') + for drive in $drives; do + check_disk "/dev/sg$(echo $drive | cut -d':' -f1)" "megaraid,$drive" + done + done + fi + ;; + # Add other controller types as needed + esac + done + + # Footer note echo "" - print_color $YELLOW "Note: SAS SSDs often do not expose write statistics through SMART." - print_color $YELLOW " TBW information may not be available for these drives." + print_color $CYAN "Note: SAS SSD TBW endurance is based on enterprise standards. Other SSD/NVMe TBW may vary depending on the specific model and manufacturer." + print_color $CYAN "Script by $CREATOR" } -# Usage information -usage() { - echo "Usage: $SCRIPT_NAME [DISK1 DISK2 ...]" +# Help function +show_help() { + echo "Usage: $SCRIPT_NAME [OPTIONS]" + echo "Check disk health for various types of storage devices" echo "" - echo "If no disks specified, auto-detects all available disks and RAID arrays" + echo "Options:" + echo " -h, --help Show this help message" + echo " -v, --version Show version information" echo "" - echo "Examples:" - echo " $SCRIPT_NAME # Check all auto-detected disks" - echo " $SCRIPT_NAME /dev/sda # Check specific disk" - echo " $SCRIPT_NAME /dev/nvme0n1 # Check specific NVMe disk" - echo " $SCRIPT_NAME /dev/sda /dev/nvme0n1 # Check multiple disks" + echo "Supported storage types:" + echo " - SATA HDD/SSD" + echo " - SAS HDD/SSD" + echo " - NVMe" + echo " - RAID controllers (MegaRAID, 3ware, Areca, HP Smart Array, Adaptec)" + echo " - Software RAID (MDRAID)" echo "" - echo "Supported: SATA HDD/SSD, SAS HDD/SSD, NVMe, Hardware RAID, Software RAID" + echo "Requirements: smartmontools package must be installed" } -# Parse command line arguments +# Version function +show_version() { + echo "$SCRIPT_NAME version $VERSION" + echo "Created by $CREATOR" +} + +# Main script execution case "${1:-}" in -h|--help) - usage + show_help exit 0 ;; -v|--version) - echo "$SCRIPT_NAME version $VERSION" + show_version exit 0 ;; *) - main "$@" + check_all_disks ;; esac