librehardwaremonitor integration

This commit is contained in:
Robin Appelman 2023-05-18 18:40:10 +02:00
commit bfb8f20439
8 changed files with 358 additions and 62 deletions

174
Cargo.lock generated
View file

@ -37,6 +37,55 @@ dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is-terminal",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d"
[[package]]
name = "anstyle-parse"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "anstyle-wincon"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
dependencies = [
"anstyle",
"windows-sys 0.48.0",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -166,6 +215,48 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "clap"
version = "4.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34d21f9bf1b425d2968943631ec91202fe5e837264063503708b83013f8fc938"
dependencies = [
"clap_builder",
"clap_derive",
"once_cell",
]
[[package]]
name = "clap_builder"
version = "4.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd"
dependencies = [
"anstream",
"anstyle",
"bitflags",
"clap_lex",
"strsim 0.10.0",
]
[[package]]
name = "clap_derive"
version = "4.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.15",
]
[[package]]
name = "clap_lex"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1"
[[package]]
name = "color-eyre"
version = "0.6.2"
@ -193,6 +284,12 @@ dependencies = [
"tracing-error",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "core-foundation-sys"
version = "0.8.4"
@ -383,6 +480,17 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "errno"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
dependencies = [
"errno-dragonfly",
"libc",
"windows-sys 0.48.0",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
@ -584,6 +692,12 @@ dependencies = [
"http",
]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.2.6"
@ -593,6 +707,12 @@ dependencies = [
"libc",
]
[[package]]
name = "hermit-abi"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
[[package]]
name = "hex"
version = "0.4.3"
@ -752,6 +872,29 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "io-lifetimes"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220"
dependencies = [
"hermit-abi 0.3.1",
"libc",
"windows-sys 0.48.0",
]
[[package]]
name = "is-terminal"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
dependencies = [
"hermit-abi 0.3.1",
"io-lifetimes",
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "itoa"
version = "1.0.6"
@ -819,6 +962,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "linux-raw-sys"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
[[package]]
name = "log"
version = "0.4.17"
@ -991,7 +1140,7 @@ version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
dependencies = [
"hermit-abi",
"hermit-abi 0.2.6",
"libc",
]
@ -1061,6 +1210,7 @@ version = "0.1.0"
dependencies = [
"ahash",
"bollard",
"clap",
"color-eyre",
"ctrlc",
"dotenvy",
@ -1222,6 +1372,20 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustix"
version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [
"bitflags",
"errno 0.3.1",
"io-lifetimes",
"libc",
"linux-raw-sys",
"windows-sys 0.48.0",
]
[[package]]
name = "rustls-pemfile"
version = "1.0.2"
@ -1411,7 +1575,7 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59e93f5d45535f49b6a05ef7ac2f0f795d28de494cf53a512751602c9849bea3"
dependencies = [
"errno",
"errno 0.2.8",
"kernel32-sys",
"libc",
"winapi 0.2.8",
@ -1695,6 +1859,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "valuable"
version = "0.1.0"

View file

@ -24,6 +24,7 @@ nvml-wrapper = "0.8.0"
if-addrs = "0.7.0"
sysconf = "0.3.4"
thiserror = "1.0.40"
clap = { version = "4.2.7", features = ["derive"] }
[target.'cfg(windows)'.dependencies]
serde = { version = "1.0.163", features = ["derive"] }

View file

@ -61,4 +61,8 @@ To enable monitoring of docker containers, add the `palantir` user to the `docke
```bash
sudo usermod -a -G docker palantir
```
```
## Windows support
Palantir has limited windows support out of the box, additional sensors can be enabled by running [LibreHardwareMonitor](https://github.com/LibreHardwareMonitor/LibreHardwareMonitor).

View file

@ -182,3 +182,41 @@ impl SensorData for DiskUsage {
}
}
}
#[derive(Debug, Default)]
pub struct PowerUsage {
pub cpu_uj: u64,
pub cpu_packages_uj: Vec<u64>,
pub gpu_uj: u64,
}
impl PowerUsage {
pub fn write<W: Write>(&self, mut w: W, hostname: &str) {
writeln!(
&mut w,
r#"total_power{{host="{}", device="cpu"}} {:.3}"#,
hostname,
self.cpu_uj as f64 / 1_000_000.0
)
.ok();
for (i, package) in self.cpu_packages_uj.iter().enumerate() {
writeln!(
&mut w,
r#"package_power{{host="{}", package="{}", device="cpu"}} {:.3}"#,
hostname,
i,
*package as f64 / 1_000_000.0
)
.ok();
}
if self.gpu_uj > 0 {
writeln!(
&mut w,
r#"total_power{{host="{}", device="gpu"}} {:.3}"#,
hostname,
self.gpu_uj as f64 / 1_000_000.0
)
.ok();
}
}
}

View file

@ -1,3 +1,4 @@
use crate::data::PowerUsage;
use crate::linux::gpu::gpu_power;
use crate::{Error, Result};
use std::fmt::Write;
@ -7,44 +8,6 @@ use tracing::warn;
static CAN_READ: AtomicBool = AtomicBool::new(true);
#[derive(Debug, Default)]
pub struct PowerUsage {
cpu_uj: u64,
cpu_packages_uj: Vec<u64>,
gpu_uj: u64,
}
impl PowerUsage {
pub fn write<W: Write>(&self, mut w: W, hostname: &str) {
writeln!(
&mut w,
r#"total_power{{host="{}", device="cpu"}} {:.3}"#,
hostname,
self.cpu_uj as f64 / 1_000_000.0
)
.ok();
for (i, package) in self.cpu_packages_uj.iter().enumerate() {
writeln!(
&mut w,
r#"package_power{{host="{}", package="{}", device="cpu"}} {:.3}"#,
hostname,
i,
*package as f64 / 1_000_000.0
)
.ok();
}
if self.gpu_uj > 0 {
writeln!(
&mut w,
r#"total_power{{host="{}", device="gpu"}} {:.3}"#,
hostname,
self.gpu_uj as f64 / 1_000_000.0
)
.ok();
}
}
}
pub fn power_usage() -> Result<Option<PowerUsage>> {
if !CAN_READ.load(Ordering::Relaxed) {
return Ok(None);

View file

@ -1,4 +1,5 @@
use bollard::Docker;
use clap::Parser;
use color_eyre::{Report, Result};
use futures_util::pin_mut;
use futures_util::StreamExt;
@ -26,6 +27,14 @@ impl From<Report> for ReportRejection {
impl Reject for ReportRejection {}
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Port to listen to
#[arg(short, long)]
port: Option<u16>,
}
async fn serve_inner(docker: Option<Docker>, sensors: &Sensors) -> Result<String> {
let mut metrics = get_metrics(sensors)?;
if let Some(docker) = docker {
@ -50,12 +59,16 @@ async fn serve_metrics(docker: Option<Docker>, sensors: Arc<Sensors>) -> Result<
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let args = Args::parse();
let host_port: u16 = dotenvy::var("PORT")
.ok()
.map(|port| port.parse())
.transpose()?
.unwrap_or(80);
let host_port = match args.port {
Some(port) => port,
None => dotenvy::var("PORT")
.ok()
.map(|port| port.parse())
.transpose()?
.unwrap_or(80),
};
let mdns = dotenvy::var("DISABLE_MDNS").is_ok();

View file

@ -12,6 +12,7 @@ use once_cell::sync::Lazy;
use os_thread_local::ThreadLocal;
use std::borrow::Cow;
use std::sync::Mutex;
use std::thread::spawn;
use sysinfo::{ComponentExt, DiskExt, NetworkExt, System, SystemExt};
pub struct Sensors {
@ -26,6 +27,7 @@ static WMI: Lazy<ThreadLocal<WmiSensor>> =
impl Sensors {
pub fn new() -> Result<Sensors> {
spawn(wmi::update_power);
let mut system = System::new_all();
system.refresh_all();
println!("{:?}", system);
@ -96,6 +98,9 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
if let Some(disk_usage) = WMI.with(|wmi| wmi.disk_usage())? {
disk_usage.write(&mut result, &hostname);
}
let hwmon_data = WMI.with(|wmi| wmi.hwmon())?;
hwmon_data.temperature.write(&mut result, &hostname);
hwmon_data.power.write(&mut result, &hostname);
Ok(result)
}

View file

@ -1,46 +1,54 @@
use crate::data::DiskStats;
use crate::data::{DiskStats, PowerUsage, Temperatures};
use crate::Result;
use serde::Deserialize;
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Mutex;
use std::thread::sleep;
use std::time::{Duration, Instant};
use wmi::{COMLibrary, WMIConnection};
pub struct WmiSensor {
wmi_con: WMIConnection,
wmi_hwmon_con: Option<WMIConnection>,
}
impl WmiSensor {
pub fn new() -> Result<Self> {
let com_con = COMLibrary::new()?;
let wmi_con = WMIConnection::new(com_con.into())?;
let wmi_con = WMIConnection::new(com_con)?;
let wmi_hwmon_con =
WMIConnection::with_namespace_path("ROOT\\LibreHardwareMonitor", com_con).ok();
Ok(WmiSensor { wmi_con })
Ok(WmiSensor {
wmi_con,
wmi_hwmon_con,
})
}
pub fn gpu_mem(&self) -> Result<u64> {
#[derive(Deserialize, Debug)]
#[allow(non_camel_case_types)]
struct Win32_PerfFormattedData_GPUPerformanceCounters_GPUAdapterMemory {
#[serde(rename = "Win32_PerfFormattedData_GPUPerformanceCounters_GPUAdapterMemory")]
struct GPUAdapterMemory {
#[serde(rename = "DedicatedUsage")]
dedicated_usage: u64,
}
let results: Vec<Win32_PerfFormattedData_GPUPerformanceCounters_GPUAdapterMemory> =
self.wmi_con.query()?;
let results: Vec<GPUAdapterMemory> = self.wmi_con.query()?;
Ok(results.iter().map(|result| result.dedicated_usage).sum())
}
pub fn gpu_usage(&self) -> Result<HashMap<String, u32>> {
#[derive(Deserialize, Debug)]
#[allow(non_camel_case_types)]
struct Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine {
#[serde(rename = "Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine")]
struct GPUEngine {
#[serde(rename = "Name")]
name: String,
#[serde(rename = "UtilizationPercentage")]
usage: u32,
}
let results: Vec<Win32_PerfFormattedData_GPUPerformanceCounters_GPUEngine> =
self.wmi_con.query()?;
let results: Vec<GPUEngine> = self.wmi_con.query()?;
let mut data = HashMap::default();
@ -56,8 +64,8 @@ impl WmiSensor {
pub fn disk_usage(&self) -> Result<Option<DiskStats>> {
#[derive(Deserialize, Debug)]
#[allow(non_camel_case_types)]
struct Win32_PerfRawData_Counters_FileSystemDiskActivity {
#[serde(rename = "Win32_PerfRawData_Counters_FileSystemDiskActivity")]
struct FileSystemDiskActivity {
#[serde(rename = "Name")]
name: String,
#[serde(rename = "FileSystemBytesRead")]
@ -66,8 +74,7 @@ impl WmiSensor {
written: u64,
}
let results: Vec<Win32_PerfRawData_Counters_FileSystemDiskActivity> =
self.wmi_con.query()?;
let results: Vec<FileSystemDiskActivity> = self.wmi_con.query()?;
for result in results {
if result.name == "_Total" {
return Ok(Some(DiskStats {
@ -79,4 +86,99 @@ impl WmiSensor {
}
Ok(None)
}
pub fn hwmon(&self) -> Result<HwMonData> {
let sensors: Vec<Sensor> = match self.wmi_hwmon_con.as_ref() {
Some(wmi) => wmi.query()?,
None => Vec::default(),
};
let temperature = Temperatures {
cpu: avg_sensors(&sensors, |sensor| {
sensor.sensor_type == "Temperature"
&& sensor.name.starts_with("CPU Core")
&& !sensor.name.contains("Distance")
}),
gpu: avg_sensors(&sensors, |sensor| {
sensor.sensor_type == "Temperature" && sensor.name == "GPU Core"
}),
};
Ok(HwMonData {
temperature,
power: power(),
})
}
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "PascalCase")]
#[allow(dead_code)]
struct Sensor {
identifier: String,
name: String,
sensor_type: String,
value: f32,
}
fn avg_sensors(sensors: &[Sensor], filter: impl Fn(&Sensor) -> bool) -> f32 {
let count = sensors.iter().filter(|sensor| filter(*sensor)).count();
let total: f32 = sensors
.iter()
.filter_map(|sensor| filter(sensor).then_some(sensor.value))
.sum();
total / count as f32
}
pub struct HwMonData {
pub temperature: Temperatures,
pub power: PowerUsage,
}
static CPU_POWER_UJ: AtomicU64 = AtomicU64::new(0);
static GPU_POWER_UJ: AtomicU64 = AtomicU64::new(0);
static POWER_LAST_READ: Mutex<Option<Instant>> = Mutex::new(None);
fn get_power_elapsed() -> Option<Duration> {
let mut last_read = POWER_LAST_READ.lock().unwrap();
let now = Instant::now();
let elapsed = last_read.as_ref().map(|last_read| now - *last_read);
*last_read = Some(now);
elapsed
}
fn get_sensor(sensors: &[Sensor], ty: &str, name: &str) -> Option<f32> {
sensors.iter().find_map(|sensor| {
(sensor.sensor_type == ty && sensor.name == name).then_some(sensor.value)
})
}
pub fn update_power() {
let Ok(com_con) = COMLibrary::new() else {return;};
if let Ok(wmi_con) = WMIConnection::with_namespace_path("ROOT\\LibreHardwareMonitor", com_con) {
loop {
if let Some(elapsed) = get_power_elapsed() {
let Ok(sensors) = wmi_con.query::<Sensor>() else {return;};
let sensors: Vec<Sensor> = sensors;
let Some(cpu_current_power) = get_sensor(&sensors, "Power", "CPU Package") else {return;};
let Some(gpu_current_power) = get_sensor(&sensors, "Power", "GPU Package") else {return;};
let elapsed_sec = elapsed.as_secs_f32();
let cpu_power = cpu_current_power * elapsed_sec * 1_000_000.0;
let gpu_power = gpu_current_power * elapsed_sec * 1_000_000.0;
CPU_POWER_UJ.fetch_add(cpu_power as u64, Ordering::SeqCst);
GPU_POWER_UJ.fetch_add(gpu_power as u64, Ordering::SeqCst);
}
sleep(Duration::from_millis(500));
}
}
}
pub fn power() -> PowerUsage {
PowerUsage {
cpu_uj: CPU_POWER_UJ.load(Ordering::SeqCst),
cpu_packages_uj: Vec::default(),
gpu_uj: GPU_POWER_UJ.load(Ordering::SeqCst),
}
}