在PVE中安装了FNOS后,PVE中无法获取到硬盘温度数据,本文主要解决PVE中获取FNOS温度数据
FNOS温度数据导出
FNSO中通过prometheus-community/smartctl_exporter导出硬盘温度数据
services:
smartctl-exporter:
image: prometheuscommunity/smartctl-exporter
privileged: true
user: root
ports:
- "9633:9633"
PVE中获取、解析、写入数据
新建文件/usr/local/bin/get_max_hdd_temp.sh
用于通过API获取温度数据
此处获取所有硬盘温度数据后,取最大值
#!/bin/bash
# smartctl_exporter 数据地址
EXPORTER_URL="http://10.115.15.20:9633/metrics"
# 默认最大温度值(异常时使用,单位:°C)
DEFAULT_MAX_TEMP=77
# 使用 curl 获取数据
TEMP_DATA=$(curl -s "$EXPORTER_URL" 2>/dev/null)
# 检查 curl 是否成功获取数据
if [[ $? -ne 0 || -z "$TEMP_DATA" ]]; then
echo "Error: Failed to fetch data from $EXPORTER_URL" >&2
echo $(( DEFAULT_MAX_TEMP * 1000 ))
exit 1
fi
# 解析所有硬盘的当前温度
TEMP_DATA=$(echo "$TEMP_DATA" | grep 'smartctl_device_temperature' | grep 'temperature_type="current"' | awk '{print $2}')
# 检查是否有有效的温度数据
if [[ -z "$TEMP_DATA" ]]; then
echo "Error: No valid temperature data found in the response from $EXPORTER_URL" >&2
echo $(( DEFAULT_MAX_TEMP * 1000 ))
exit 1
fi
# 初始化最大温度值
MAX_TEMP=0
# 遍历所有温度值,找出最大值
for temp in $TEMP_DATA; do
# 忽略无效或零值(如 0°C)
if [[ "$temp" =~ ^[0-9]+$ ]] && (( temp > MAX_TEMP )); then
MAX_TEMP=$temp
fi
done
# 如果最大温度仍为 0(可能所有硬盘温度均为 0 或无效),输出默认值
if (( MAX_TEMP == 0 )); then
echo "Error: All temperatures are invalid or zero. Using default value." >&2
echo $(( DEFAULT_MAX_TEMP * 1000 ))
exit 1
fi
# 输出最大温度值(单位:毫摄氏度,乘以 1000)
echo $(( MAX_TEMP * 1000 ))
将数据写入文件
创建文件/usr/local/bin/write_hdd_temp.sh
将获取到的温度数据写入/var/lib/hdd_temp/max_hdd_temp.txt
#!/bin/bash
# 获取最大温度值
MAX_TEMP=$(/usr/local/bin/get_max_hdd_temp.sh)
# 获取当前时间
CURRENT_TIME=$(date +"%Y-%m-%d %H:%M:%S")
# 写入文件
echo "$MAX_TEMP" > /var/lib/hdd_temp/max_hdd_temp.txt
# 记录日志到系统日志中
logger "Virtual Temperature Sensor: Updated HDD temperature to $MAX_TEMP°C at $CURRENT_TIME"
# 输出调试信息到标准输出(可选)
echo "Updated HDD temperature to $MAX_TEMP°C at $CURRENT_TIME"
定时执行
使用systemctl定制执行以上脚本,15秒更新一次温度数据
创建文件/etc/systemd/system/write-hdd-temp.service
[Unit]
Description=Write HDD temperature to file
[Service]
Type=oneshot
ExecStart=/usr/local/bin/write_hdd_temp.sh
创建文件/etc/systemd/system/write-hdd-temp.timer
[Unit]
Description=Run virtual-temp-sensor service every 15 seconds
[Timer]
OnBootSec=15sec
OnUnitActiveSec=15sec
AccuracySec=1sec
[Install]
WantedBy=timers.target
启动定时任务
sudo systemctl daemon-reload
sudo systemctl enable write-hdd-temp.timer
sudo systemctl start write-hdd-temp.timer
配置fancontrol
硬盘风扇为7,CPU风扇为2和6
Configuration file generated by pwmconfig, changes will be lost
INTERVAL=10
DEVPATH=hwmon2=devices/platform/coretemp.0 hwmon3=devices/platform/nct6775.2592
DEVNAME=hwmon2=coretemp hwmon3=nct6798
# 温度与 PWM 对应关系
FCTEMPS=hwmon3/pwm2=hwmon2/temp1_input hwmon3/pwm7=hwmon2/temp1_input hwmon3/pwm6=/var/lib/hdd_temp/max_hdd_temp.txt
# 风扇与 PWM 对应关系
FCFANS=hwmon3/pwm6=hwmon3/fan6_input hwmon3/pwm2=hwmon3/fan2_input hwmon3/pwm7=hwmon3/fan7_input
# 最低和最高温度
MINTEMP=hwmon3/pwm2=40 hwmon3/pwm7=40 hwmon3/pwm6=35
MAXTEMP=hwmon3/pwm2=70 hwmon3/pwm7=70 hwmon3/pwm6=45
# 最低启动和停止 PWM 值
MINSTART=hwmon3/pwm2=150 hwmon3/pwm7=100 hwmon3/pwm6=80
MINSTOP=hwmon3/pwm2=0 hwmon3/pwm7=24 hwmon3/pwm6=80
# 最低和最高 PWM 值
MINPWM=hwmon3/pwm7=0 hwmon3/pwm6=80
MAXPWM=hwmon3/pwm2=255 hwmon3/pwm7=255 hwmon3/pwm6=255
0