use hwmon abstraction for temps

This commit is contained in:
Robin Appelman 2023-05-03 20:57:27 +02:00
commit a81b07bdec
2 changed files with 103 additions and 75 deletions

View file

@ -11,6 +11,7 @@ use crate::disk::*;
use crate::sensors::*; use crate::sensors::*;
use color_eyre::Result; use color_eyre::Result;
use std::fmt::Write; use std::fmt::Write;
use std::io;
pub fn get_metrics() -> Result<String> { pub fn get_metrics() -> Result<String> {
let disk_usage = disk_usage()?; let disk_usage = disk_usage()?;
@ -18,29 +19,15 @@ pub fn get_metrics() -> Result<String> {
let cpu = cpu_time()?; let cpu = cpu_time()?;
let hostname = hostname()?; let hostname = hostname()?;
let memory = memory()?; let memory = memory()?;
let temperatures = temperatures()?; let mut temp_source = TemperatureSource::new()?;
let temperatures = temp_source.read()?;
let pools = pools(); let pools = pools();
let networks = network_stats()?; let networks = network_stats()?;
let mut result = String::with_capacity(256); let mut result = String::with_capacity(256);
writeln!(&mut result, "cpu_time{{host=\"{}\"}} {:.3}", hostname, cpu).ok(); writeln!(&mut result, "cpu_time{{host=\"{}\"}} {:.3}", hostname, cpu).ok();
writeln!(
&mut result, memory.write(&mut result, &hostname);
"memory_total{{host=\"{}\"}} {}",
hostname, memory.total
)
.ok();
writeln!(
&mut result,
"memory_available{{host=\"{}\"}} {}",
hostname, memory.available
)
.ok();
writeln!(
&mut result,
"memory_free{{host=\"{}\"}} {}",
hostname, memory.free
)
.ok();
for pool in pools { for pool in pools {
writeln!( writeln!(
&mut result, &mut result,
@ -116,3 +103,14 @@ pub fn get_metrics() -> Result<String> {
} }
Ok(result) Ok(result)
} }
pub trait SensorData {
/// Write sensor data in prometheus compatible format
fn write<W: Write>(&self, w: W, hostname: &str);
}
pub trait SensorSource {
type Data: SensorData;
fn read(&mut self) -> io::Result<Self::Data>;
}

View file

@ -1,9 +1,12 @@
use crate::disk::IoStats; use crate::disk::IoStats;
use crate::hwmon::{Device, FileSource};
use crate::{SensorData, SensorSource};
use color_eyre::{Report, Result}; use color_eyre::{Report, Result};
use std::array::IntoIter; use std::array::IntoIter;
use std::fs::{read, read_dir, read_to_string, File}; use std::fmt::Write;
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
use std::os::unix::ffi::OsStrExt;
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct Temperatures { pub struct Temperatures {
@ -20,6 +23,21 @@ impl IntoIterator for Temperatures {
} }
} }
impl SensorData for Temperatures {
fn write<W: Write>(&self, mut w: W, hostname: &str) {
for (label, temp) in self.clone() {
if temp != 0.0 {
writeln!(
&mut w,
"temperature{{host=\"{}\", sensor=\"{}\"}} {:.1}",
hostname, label, temp
)
.ok();
}
}
}
}
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct Memory { pub struct Memory {
pub total: u64, pub total: u64,
@ -27,72 +45,84 @@ pub struct Memory {
pub available: u64, pub available: u64,
} }
pub fn temperatures() -> Result<Temperatures> { impl SensorData for Memory {
let mut temps = Temperatures::default(); fn write<W: Write>(&self, mut w: W, hostname: &str) {
writeln!(
const DESIRED_HW_MON: &[&[u8]] = &[b"k10temp\n", b"coretemp\n", b"amdgpu\n"]; &mut w,
const DESIRED_SENSORS: &[&[u8]] = &[b"Tdie\n", b"edge\n"]; "memory_total{{host=\"{}\"}} {}",
hostname, self.total
let mut cores_found = 0.0; )
let mut core_total = 0.0; .ok();
writeln!(
for hwmon in read_dir("/sys/class/hwmon")? { &mut w,
let hwmon = hwmon?; "memory_available{{host=\"{}\"}} {}",
let hwmon_name = read(hwmon.path().join("name"))?; hostname, self.available
)
// rpi cpu_thermal doesn't have labels, special case it .ok();
if hwmon_name.as_slice() == b"cpu_thermal\n" { writeln!(&mut w, "memory_free{{host=\"{}\"}} {}", hostname, self.free).ok();
let mut path = hwmon.path();
path.push("temp1_input");
let value = read_to_string(path)?;
let parsed: u32 = value.trim().parse()?;
temps.cpu = parsed as f32 / 1000.0
} }
if !DESIRED_HW_MON.contains(&hwmon_name.as_slice()) {
continue;
} }
for file in read_dir(hwmon.path())? {
let file = file?; pub struct TemperatureSource {
let path = file.path(); cpu_sensors: Vec<FileSource>,
let file_name = file.file_name(); gpu_sensors: Vec<FileSource>,
let bytes = file_name.as_bytes();
let label = if bytes.starts_with(b"temp") && bytes.ends_with(b"_label") {
read(&path)?
} else {
continue;
};
if !DESIRED_SENSORS.contains(&label.as_slice()) && !label.starts_with(b"Core") {
continue;
} }
let mut path = path
.into_os_string() impl TemperatureSource {
.into_string() pub fn new() -> io::Result<TemperatureSource> {
.map_err(|_| Report::msg("Invalid hwmon path"))?; let mut cpu_sensors = Vec::new();
path.truncate(path.len() - "label".len()); let mut gpu_sensors = Vec::new();
path.push_str("input");
let value = read_to_string(path)?; for device in Device::list().flatten() {
let parsed: u32 = value.trim().parse()?; if device.name() == "k10temp" || device.name() == "coretemp" {
match (hwmon_name.as_slice(), label.as_slice()) { for sensor in device.sensors().flatten() {
(b"k10temp\n", b"Tdie\n") => temps.cpu = parsed as f32 / 1000.0, if sensor.name() == "Tdie" || sensor.name().starts_with("Core ") {
(b"amdgpu\n", b"edge\n") => temps.gpu = parsed as f32 / 1000.0, cpu_sensors.push(sensor.reader()?);
(b"coretemp\n", core) if core.starts_with(b"Core") => {
cores_found += 1.0;
core_total += parsed as f32 / 1000.0
}
_ => {}
} }
} }
} }
if temps.cpu == 0.0 && core_total > 0.0 { if device.name() == "amdgpu" {
temps.cpu = core_total / cores_found for sensor in device.sensors().flatten() {
if sensor.name() == "edge" {
gpu_sensors.push(sensor.reader()?);
}
}
}
} }
if let Some(nvidia_temperature) = crate::gpu::nvidia::temperature() { Ok(TemperatureSource {
temps.gpu = nvidia_temperature; cpu_sensors,
gpu_sensors,
})
}
} }
Ok(temps) fn average_sensors(sensors: &mut [FileSource]) -> f32 {
if sensors.is_empty() {
return 0.0;
}
let mut total = 0.0;
let mut count = 0.0;
for sensor in sensors.iter_mut() {
if let Ok(value) = sensor.read::<f32>() {
total += value;
count += 1.0
}
}
total / count
}
impl SensorSource for TemperatureSource {
type Data = Temperatures;
fn read(&mut self) -> io::Result<Self::Data> {
Ok(Temperatures {
cpu: average_sensors(&mut self.cpu_sensors) / 1000.0,
gpu: average_sensors(&mut self.gpu_sensors) / 1000.0,
})
}
} }
pub fn memory() -> Result<Memory> { pub fn memory() -> Result<Memory> {