mirror of
https://codeberg.org/icewind/palantir.git
synced 2026-06-03 10:14:09 +02:00
multi gpu
This commit is contained in:
parent
22c82c59af
commit
217933c1a1
15 changed files with 234 additions and 129 deletions
9
Cargo.lock
generated
9
Cargo.lock
generated
|
|
@ -442,6 +442,12 @@ version = "0.15.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
|
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
|
|
@ -1553,7 +1559,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sidewindow"
|
name = "sidewindow"
|
||||||
version = "1.3.1"
|
version = "1.4.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"bollard",
|
"bollard",
|
||||||
|
|
@ -1561,6 +1567,7 @@ dependencies = [
|
||||||
"color-eyre",
|
"color-eyre",
|
||||||
"ctrlc",
|
"ctrlc",
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
|
"either",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"hostname",
|
"hostname",
|
||||||
"if-addrs 0.15.0",
|
"if-addrs 0.15.0",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "sidewindow"
|
name = "sidewindow"
|
||||||
version = "1.3.1"
|
version = "1.4.0"
|
||||||
authors = ["Robin Appelman <robin@icewind.nl>"]
|
authors = ["Robin Appelman <robin@icewind.nl>"]
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
rust-version = "1.88.0"
|
rust-version = "1.88.0"
|
||||||
|
|
@ -26,6 +26,7 @@ if-addrs = "0.15.0"
|
||||||
sysconf = "0.3.4"
|
sysconf = "0.3.4"
|
||||||
thiserror = "2.0.18"
|
thiserror = "2.0.18"
|
||||||
clap = { version = "4.6.0", features = ["derive"] }
|
clap = { version = "4.6.0", features = ["derive"] }
|
||||||
|
either = "1.15.0"
|
||||||
|
|
||||||
[target.'cfg(not(windows))'.dependencies]
|
[target.'cfg(not(windows))'.dependencies]
|
||||||
procfs = "0.18.0"
|
procfs = "0.18.0"
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,8 @@ Opinionated system metrics exporter for prometheus
|
||||||
- Download the binary for your architecture from the
|
- Download the binary for your architecture from the
|
||||||
[releases](https://codeberg.org/icewind/sidewindow/releases/) and place it at
|
[releases](https://codeberg.org/icewind/sidewindow/releases/) and place it at
|
||||||
`/usr/local/bin/sidewindow`
|
`/usr/local/bin/sidewindow`
|
||||||
- Place the [sidewindow.service](sidewindow.service) file in `/etc/systemd/system/`
|
- Place the [sidewindow.service](sidewindow.service) file in
|
||||||
|
`/etc/systemd/system/`
|
||||||
- Create the `sidewindow` user: `sudo useradd -m sidewindow`
|
- Create the `sidewindow` user: `sudo useradd -m sidewindow`
|
||||||
- Start enable enable the server: `sudo systemctl enable --now sidewindow`
|
- Start enable enable the server: `sudo systemctl enable --now sidewindow`
|
||||||
- Metrics will be available at `localhost:5665/metrics`
|
- Metrics will be available at `localhost:5665/metrics`
|
||||||
|
|
|
||||||
18
src/data.rs
18
src/data.rs
|
|
@ -60,6 +60,7 @@ impl SensorData for Memory {
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct GpuMemory {
|
pub struct GpuMemory {
|
||||||
|
pub card: u32,
|
||||||
pub total: u64,
|
pub total: u64,
|
||||||
pub free: u64,
|
pub free: u64,
|
||||||
}
|
}
|
||||||
|
|
@ -68,14 +69,14 @@ impl SensorData for GpuMemory {
|
||||||
fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut w,
|
&mut w,
|
||||||
"gpu_memory_total{{host=\"{}\"}} {}",
|
"gpu_memory_total{{host=\"{}\", gpu=\"{}\"}} {}",
|
||||||
hostname, self.total
|
hostname, self.card, self.total
|
||||||
)
|
)
|
||||||
.ok();
|
.ok();
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut w,
|
&mut w,
|
||||||
"gpu_memory_free{{host=\"{}\"}} {}",
|
"gpu_memory_free{{host=\"{}\", gpu=\"{}\"}} {}",
|
||||||
hostname, self.free
|
hostname, self.card, self.free
|
||||||
)
|
)
|
||||||
.ok();
|
.ok();
|
||||||
}
|
}
|
||||||
|
|
@ -116,6 +117,7 @@ impl SensorData for NetStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct GpuUsage {
|
pub struct GpuUsage {
|
||||||
|
pub card: u32,
|
||||||
pub system: Cow<'static, str>,
|
pub system: Cow<'static, str>,
|
||||||
pub usage: u32,
|
pub usage: u32,
|
||||||
}
|
}
|
||||||
|
|
@ -124,8 +126,8 @@ impl GpuUsage {
|
||||||
pub fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
pub fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut w,
|
&mut w,
|
||||||
r#"gpu_usage{{host="{}", system="{}"}} {:.3}"#,
|
r#"gpu_usage{{host="{}", system="{}", gpu="{}"}} {:.3}"#,
|
||||||
hostname, self.system, self.usage,
|
hostname, self.system, self.card, self.usage,
|
||||||
)
|
)
|
||||||
.ok();
|
.ok();
|
||||||
}
|
}
|
||||||
|
|
@ -213,6 +215,7 @@ impl SensorData for CpuPowerUsage {
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct GpuPowerUsage {
|
pub struct GpuPowerUsage {
|
||||||
|
pub card: u32,
|
||||||
pub gpu_uj: u64,
|
pub gpu_uj: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -221,8 +224,9 @@ impl SensorData for GpuPowerUsage {
|
||||||
if self.gpu_uj > 0 {
|
if self.gpu_uj > 0 {
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut w,
|
&mut w,
|
||||||
r#"total_power{{host="{}", device="gpu"}} {:.3}"#,
|
r#"total_power{{host="{}", device="gpu", gpu="{}"}} {:.3}"#,
|
||||||
hostname,
|
hostname,
|
||||||
|
self.card,
|
||||||
self.gpu_uj as f64 / 1_000_000.0
|
self.gpu_uj as f64 / 1_000_000.0
|
||||||
)
|
)
|
||||||
.ok();
|
.ok();
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,9 @@ pub mod linux;
|
||||||
pub mod win;
|
pub mod win;
|
||||||
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
#[cfg(not(target_os = "windows"))]
|
||||||
pub use linux::{get_metrics, Sensors};
|
pub use linux::{Sensors, get_metrics};
|
||||||
#[cfg(target_os = "windows")]
|
#[cfg(target_os = "windows")]
|
||||||
pub use win::{get_metrics, Sensors};
|
pub use win::{Sensors, get_metrics};
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
|
|
@ -82,11 +82,8 @@ pub trait SensorSource {
|
||||||
|
|
||||||
pub trait MultiSensorSource {
|
pub trait MultiSensorSource {
|
||||||
type Data: SensorData;
|
type Data: SensorData;
|
||||||
type Iter<'a>: Iterator<Item = Result<Self::Data>>
|
|
||||||
where
|
|
||||||
Self: 'a;
|
|
||||||
|
|
||||||
fn read(&mut self) -> Result<Self::Iter<'_>>;
|
fn read(&mut self) -> Result<impl Iterator<Item = Result<Self::Data>>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn hostname() -> Result<String> {
|
pub fn hostname() -> Result<String> {
|
||||||
|
|
|
||||||
|
|
@ -29,9 +29,8 @@ impl DiskStatSource {
|
||||||
|
|
||||||
impl MultiSensorSource for DiskStatSource {
|
impl MultiSensorSource for DiskStatSource {
|
||||||
type Data = DiskStats;
|
type Data = DiskStats;
|
||||||
type Iter<'a> = DiskStatParser<'a>;
|
|
||||||
|
|
||||||
fn read(&mut self) -> Result<Self::Iter<'_>> {
|
fn read(&mut self) -> Result<impl Iterator<Item = Result<Self::Data>>> {
|
||||||
self.buff.clear();
|
self.buff.clear();
|
||||||
self.source.rewind().context("error rewinding disk stats")?;
|
self.source.rewind().context("error rewinding disk stats")?;
|
||||||
self.source
|
self.source
|
||||||
|
|
@ -93,9 +92,8 @@ impl DiskUsageSource {
|
||||||
|
|
||||||
impl MultiSensorSource for DiskUsageSource {
|
impl MultiSensorSource for DiskUsageSource {
|
||||||
type Data = DiskUsage;
|
type Data = DiskUsage;
|
||||||
type Iter<'a> = DiskUsageParser<'a>;
|
|
||||||
|
|
||||||
fn read(&mut self) -> Result<Self::Iter<'_>> {
|
fn read(&mut self) -> Result<impl Iterator<Item = Result<Self::Data>>> {
|
||||||
self.buff.clear();
|
self.buff.clear();
|
||||||
self.source.rewind().context("error rewinding mounts")?;
|
self.source.rewind().context("error rewinding mounts")?;
|
||||||
self.source
|
self.source
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
use crate::data::{GpuMemory, GpuUsage};
|
use crate::data::{GpuMemory, GpuPowerUsage, GpuUsage};
|
||||||
use crate::linux::hwmon::FileSource;
|
use crate::linux::hwmon::FileSource;
|
||||||
|
use either::Either;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::fs::{read_dir, read_to_string};
|
use std::fs::{read_dir, read_to_string};
|
||||||
|
use std::iter::empty;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
@ -12,46 +14,91 @@ use tracing::{info, warn};
|
||||||
|
|
||||||
pub mod nvidia;
|
pub mod nvidia;
|
||||||
|
|
||||||
fn read_num<T: FromStr>(path: &str) -> Option<T> {
|
struct Card {
|
||||||
read_to_string(path).ok()?.trim().parse().ok()
|
id: u32,
|
||||||
|
path: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn memory() -> Option<GpuMemory> {
|
impl Card {
|
||||||
if let Some(nv_mem) = nvidia::memory() {
|
fn read_num<T: FromStr>(&self, name: &str) -> Option<T> {
|
||||||
return Some(nv_mem);
|
read_to_string(self.path.join(name))
|
||||||
|
.ok()?
|
||||||
|
.trim()
|
||||||
|
.parse()
|
||||||
|
.ok()
|
||||||
}
|
}
|
||||||
// 1 gpu should be enough for everyone
|
}
|
||||||
let used = read_num::<u64>("/sys/class/drm/card0/device/mem_info_vram_used")?;
|
|
||||||
let total = read_num("/sys/class/drm/card0/device/mem_info_vram_total")?;
|
fn cards() -> impl Iterator<Item = Card> {
|
||||||
Some(GpuMemory {
|
let Ok(dir) = read_dir("/sys/class/drm") else {
|
||||||
total,
|
return Either::Left(empty());
|
||||||
free: total - used,
|
};
|
||||||
})
|
Either::Right(dir.flatten().flat_map(|entry| {
|
||||||
|
let mut path = entry.path();
|
||||||
|
let id: u32 = path
|
||||||
|
.file_name()?
|
||||||
|
.to_str()?
|
||||||
|
.strip_prefix("card")?
|
||||||
|
.parse()
|
||||||
|
.ok()?;
|
||||||
|
path.push("device");
|
||||||
|
Some(Card { id, path })
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn memory() -> impl Iterator<Item = GpuMemory> {
|
||||||
|
if let Some(nv_mem) = nvidia::memory() {
|
||||||
|
return Either::Left(nv_mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
Either::Right(cards().flat_map(|card| {
|
||||||
|
let used = card.read_num::<u64>("mem_info_vram_used")?;
|
||||||
|
let total = card.read_num("mem_info_vram_total")?;
|
||||||
|
Some(GpuMemory {
|
||||||
|
card: card.id,
|
||||||
|
total,
|
||||||
|
free: total - used,
|
||||||
|
})
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn utilization() -> impl Iterator<Item = GpuUsage> {
|
pub fn utilization() -> impl Iterator<Item = GpuUsage> {
|
||||||
let nv_usage = nvidia::utilization();
|
cards().flat_map(|card| {
|
||||||
|
let nv_usage = nvidia::utilization();
|
||||||
|
|
||||||
let sources = [
|
let sources = [
|
||||||
(
|
("memory", card.read_num("mem_busy_percent")),
|
||||||
"memory",
|
("compute", card.read_num("gpu_busy_percent")),
|
||||||
read_num("/sys/class/drm/card0/device/mem_busy_percent"),
|
];
|
||||||
),
|
let drm = sources.into_iter().flat_map(move |(system, usage)| {
|
||||||
(
|
Some(GpuUsage {
|
||||||
"compute",
|
card: card.id,
|
||||||
read_num("/sys/class/drm/card0/device/gpu_busy_percent"),
|
system: Cow::Borrowed(system),
|
||||||
),
|
usage: usage?,
|
||||||
];
|
})
|
||||||
let drm = sources.into_iter().flat_map(|(system, usage)| {
|
});
|
||||||
Some(GpuUsage {
|
drm.chain(nv_usage.into_iter().flatten())
|
||||||
system: Cow::Borrowed(system),
|
})
|
||||||
usage: usage?,
|
|
||||||
})
|
|
||||||
});
|
|
||||||
drm.chain(nv_usage)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static GPU_POWER_UJ: AtomicU64 = AtomicU64::new(0);
|
static GPU_POWER_UJ: [AtomicU64; 16] = [
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
AtomicU64::new(0),
|
||||||
|
];
|
||||||
static GPU_POWER_LAST_READ: Mutex<Option<Instant>> = Mutex::new(None);
|
static GPU_POWER_LAST_READ: Mutex<Option<Instant>> = Mutex::new(None);
|
||||||
|
|
||||||
fn get_gpu_power_elapsed() -> Option<Duration> {
|
fn get_gpu_power_elapsed() -> Option<Duration> {
|
||||||
|
|
@ -62,33 +109,40 @@ fn get_gpu_power_elapsed() -> Option<Duration> {
|
||||||
elapsed
|
elapsed
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_gpu_sensor() -> Option<PathBuf> {
|
fn find_gpu_sensor() -> impl Iterator<Item = PathBuf> {
|
||||||
read_dir("/sys/class/drm/card0/device/hwmon")
|
cards().flat_map(|card| {
|
||||||
.ok()?
|
read_dir(card.path.join("hwmon"))
|
||||||
.flatten()
|
.ok()?
|
||||||
.find_map(|hwmon| {
|
.flatten()
|
||||||
let path = hwmon.path().join("power1_average");
|
.find_map(|hwmon| {
|
||||||
path.exists().then_some(path)
|
let path = hwmon.path().join("power1_average");
|
||||||
})
|
path.exists().then_some(path)
|
||||||
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_gpu_power() {
|
pub fn update_gpu_power() {
|
||||||
if let Some(Ok(mut file)) = find_gpu_sensor().map(FileSource::open) {
|
let mut sensors = find_gpu_sensor()
|
||||||
|
.flat_map(FileSource::open)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
if !sensors.is_empty() {
|
||||||
loop {
|
loop {
|
||||||
if let Some(elapsed) = get_gpu_power_elapsed() {
|
if let Some(elapsed) = get_gpu_power_elapsed() {
|
||||||
let current_power: u64 = match file.read() {
|
for (card, sensor) in sensors.iter_mut().enumerate().take(16) {
|
||||||
Ok(current_power) => current_power,
|
let current_power: u64 = match sensor.read() {
|
||||||
Err(_) => {
|
Ok(current_power) => current_power,
|
||||||
warn!("failed to read gpu power sensor");
|
Err(_) => {
|
||||||
return;
|
warn!("failed to read gpu power sensor");
|
||||||
}
|
return;
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let elapsed_milli = elapsed.as_millis() as u64;
|
let elapsed_milli = elapsed.as_millis() as u64;
|
||||||
|
|
||||||
let power = current_power * elapsed_milli / 1000;
|
let power = current_power * elapsed_milli / 1000;
|
||||||
|
|
||||||
GPU_POWER_UJ.fetch_add(power, Ordering::SeqCst);
|
GPU_POWER_UJ[card].fetch_add(power, Ordering::SeqCst);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
sleep(Duration::from_millis(500));
|
sleep(Duration::from_millis(500));
|
||||||
}
|
}
|
||||||
|
|
@ -96,6 +150,14 @@ pub fn update_gpu_power() {
|
||||||
info!("no gpu sensor");
|
info!("no gpu sensor");
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn gpu_power() -> u64 {
|
pub fn gpu_power() -> impl Iterator<Item = GpuPowerUsage> {
|
||||||
GPU_POWER_UJ.load(Ordering::SeqCst)
|
GPU_POWER_UJ
|
||||||
|
.iter()
|
||||||
|
.map(|gpu| gpu.load(Ordering::SeqCst))
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(_, power)| *power > 0)
|
||||||
|
.map(|(card, power)| GpuPowerUsage {
|
||||||
|
card: card as u32,
|
||||||
|
gpu_uj: power,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::data::{GpuMemory, GpuUsage};
|
use crate::data::{GpuMemory, GpuPowerUsage, GpuUsage};
|
||||||
use nvml_wrapper::enum_wrappers::device::TemperatureSensor;
|
use nvml_wrapper::enum_wrappers::device::TemperatureSensor;
|
||||||
use nvml_wrapper::{Device, Nvml};
|
use nvml_wrapper::{Device, Nvml};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
@ -6,32 +6,50 @@ use std::borrow::Cow;
|
||||||
|
|
||||||
static NVIDIA: Lazy<Option<Nvml>> = Lazy::new(|| Nvml::init().ok());
|
static NVIDIA: Lazy<Option<Nvml>> = Lazy::new(|| Nvml::init().ok());
|
||||||
|
|
||||||
fn device() -> Option<Device<'static>> {
|
fn devices() -> Option<impl Iterator<Item = Device<'static>>> {
|
||||||
NVIDIA.as_ref()?.device_by_index(0).ok()
|
let count = NVIDIA.as_ref()?.device_count().unwrap_or_default();
|
||||||
|
Some((0..count).flat_map(device))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn temperature() -> Option<f32> {
|
fn device(index: u32) -> Option<Device<'static>> {
|
||||||
let temp = device()?.temperature(TemperatureSensor::Gpu).ok()?;
|
NVIDIA.as_ref()?.device_by_index(index).ok()
|
||||||
Some(temp as f32)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn power() -> Option<u64> {
|
pub fn temperature() -> Option<impl Iterator<Item = f32>> {
|
||||||
device()?
|
Some(devices()?.flat_map(|device| {
|
||||||
.total_energy_consumption()
|
device
|
||||||
.ok()
|
.temperature(TemperatureSensor::Gpu)
|
||||||
.map(|mj| mj * 1_000)
|
.ok()
|
||||||
|
.map(|t| t as f32)
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn memory() -> Option<GpuMemory> {
|
pub fn power() -> Option<impl Iterator<Item = GpuPowerUsage>> {
|
||||||
let mem = device()?.memory_info().ok()?;
|
Some(devices()?.flat_map(|device| {
|
||||||
Some(GpuMemory {
|
let power = device
|
||||||
total: mem.total,
|
.total_energy_consumption()
|
||||||
free: mem.free,
|
.ok()
|
||||||
})
|
.map(|mj| mj * 1_000)?;
|
||||||
|
Some(GpuPowerUsage {
|
||||||
|
card: device.index().unwrap_or_default(),
|
||||||
|
gpu_uj: power,
|
||||||
|
})
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn utilization() -> impl Iterator<Item = GpuUsage> {
|
pub fn memory() -> Option<impl Iterator<Item = GpuMemory>> {
|
||||||
let sources = if let Some(device) = device() {
|
Some(devices()?.flat_map(|device| {
|
||||||
|
let mem = device.memory_info().ok()?;
|
||||||
|
Some(GpuMemory {
|
||||||
|
card: device.index().unwrap_or_default(),
|
||||||
|
total: mem.total,
|
||||||
|
free: mem.free,
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn utilization() -> Option<impl Iterator<Item = GpuUsage>> {
|
||||||
|
let sources = devices()?.flat_map(|device| {
|
||||||
let utilization = device.utilization_rates().ok();
|
let utilization = device.utilization_rates().ok();
|
||||||
[
|
[
|
||||||
("compute", utilization.as_ref().map(|u| u.gpu)),
|
("compute", utilization.as_ref().map(|u| u.gpu)),
|
||||||
|
|
@ -45,13 +63,12 @@ pub fn utilization() -> impl Iterator<Item = GpuUsage> {
|
||||||
device.decoder_utilization().ok().map(|u| u.utilization),
|
device.decoder_utilization().ok().map(|u| u.utilization),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
} else {
|
});
|
||||||
[("", None); 4]
|
Some(sources.into_iter().flat_map(|(system, usage)| {
|
||||||
};
|
|
||||||
sources.into_iter().flat_map(|(system, usage)| {
|
|
||||||
Some(GpuUsage {
|
Some(GpuUsage {
|
||||||
|
card: 0,
|
||||||
system: Cow::Borrowed(system),
|
system: Cow::Borrowed(system),
|
||||||
usage: usage?,
|
usage: usage?,
|
||||||
})
|
})
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
use std::fs::{File, read_dir, read_to_string};
|
use std::fmt::Debug;
|
||||||
|
use std::fs::{read_dir, read_to_string, File};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::{ErrorKind, Read, Seek};
|
use std::io::{ErrorKind, Read, Seek};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
@ -11,13 +12,20 @@ fn read_to_string_trimmed(path: &Path) -> io::Result<String> {
|
||||||
s.truncate(len);
|
s.truncate(len);
|
||||||
Ok(s)
|
Ok(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct FileSource {
|
pub struct FileSource {
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
buff: String,
|
buff: String,
|
||||||
file: File,
|
file: File,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Debug for FileSource {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("FileSource")
|
||||||
|
.field("path", &self.path)
|
||||||
|
.finish_non_exhaustive()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl FileSource {
|
impl FileSource {
|
||||||
#[instrument(skip_all, fields(path = ?path.as_ref()))]
|
#[instrument(skip_all, fields(path = ?path.as_ref()))]
|
||||||
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<FileSource> {
|
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<FileSource> {
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ use crate::linux::disk::zfs::arcstats;
|
||||||
use crate::linux::gpu::{update_gpu_power, utilization};
|
use crate::linux::gpu::{update_gpu_power, utilization};
|
||||||
use crate::linux::power::{CpuPowerSource, GpuPowerSource};
|
use crate::linux::power::{CpuPowerSource, GpuPowerSource};
|
||||||
use crate::linux::proc::ProcSource;
|
use crate::linux::proc::ProcSource;
|
||||||
use crate::{hostname, Error, MultiSensorSource, Result, SensorData, SensorSource};
|
use crate::{Error, MultiSensorSource, Result, SensorData, SensorSource, hostname};
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
use sysconf::SysconfError;
|
use sysconf::SysconfError;
|
||||||
|
|
@ -65,7 +65,7 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
let memory = sensors.mem.lock().unwrap().read()?;
|
let memory = sensors.mem.lock().unwrap().read()?;
|
||||||
let temperatures = sensors.temp.lock().unwrap().read()?;
|
let temperatures = sensors.temp.lock().unwrap().read()?;
|
||||||
let cpu_power = sensors.cpu_power.lock().unwrap().read()?;
|
let cpu_power = sensors.cpu_power.lock().unwrap().read()?;
|
||||||
let gpu_power = sensors.gpu_power.lock().unwrap().read()?;
|
let mut gpu_power = sensors.gpu_power.lock().unwrap();
|
||||||
let mut net = sensors.net.lock().unwrap();
|
let mut net = sensors.net.lock().unwrap();
|
||||||
let mut proc = sensors.proc.lock().unwrap();
|
let mut proc = sensors.proc.lock().unwrap();
|
||||||
let networks = net.read()?;
|
let networks = net.read()?;
|
||||||
|
|
@ -111,11 +111,14 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_power.write(&mut result, &sensors.hostname);
|
cpu_power.write(&mut result, &sensors.hostname);
|
||||||
gpu_power.write(&mut result, &sensors.hostname);
|
|
||||||
|
for gpu_power in gpu_power.read()? {
|
||||||
|
gpu_power?.write(&mut result, &sensors.hostname);
|
||||||
|
}
|
||||||
if let Some(arc) = arcstats() {
|
if let Some(arc) = arcstats() {
|
||||||
arc.write(&mut result, &sensors.hostname);
|
arc.write(&mut result, &sensors.hostname);
|
||||||
}
|
}
|
||||||
if let Some(memory) = gpu::memory() {
|
for memory in gpu::memory() {
|
||||||
memory.write(&mut result, &sensors.hostname)
|
memory.write(&mut result, &sensors.hostname)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
|
use either::Either;
|
||||||
|
|
||||||
use crate::data::{CpuPowerUsage, GpuPowerUsage};
|
use crate::data::{CpuPowerUsage, GpuPowerUsage};
|
||||||
use crate::linux::gpu::gpu_power;
|
use crate::linux::gpu::gpu_power;
|
||||||
use crate::linux::hwmon::FileSource;
|
use crate::linux::hwmon::FileSource;
|
||||||
use crate::{IoResultExt, Result, SensorSource};
|
use crate::{IoResultExt, MultiSensorSource, Result, SensorSource};
|
||||||
use std::fs::read_dir;
|
use std::fs::read_dir;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|
@ -49,11 +51,13 @@ impl SensorSource for CpuPowerSource {
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct GpuPowerSource;
|
pub struct GpuPowerSource;
|
||||||
|
|
||||||
impl SensorSource for GpuPowerSource {
|
impl MultiSensorSource for GpuPowerSource {
|
||||||
type Data = GpuPowerUsage;
|
type Data = GpuPowerUsage;
|
||||||
|
|
||||||
fn read(&mut self) -> Result<Self::Data> {
|
fn read(&mut self) -> Result<impl Iterator<Item = Result<Self::Data>>> {
|
||||||
let gpu_uj = crate::linux::gpu::nvidia::power().unwrap_or_else(gpu_power);
|
Ok(crate::linux::gpu::nvidia::power()
|
||||||
Ok(GpuPowerUsage { gpu_uj })
|
.map(Either::Left)
|
||||||
|
.unwrap_or_else(|| Either::Right(gpu_power()))
|
||||||
|
.map(Ok))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ use crate::linux::sensors::MemorySource;
|
||||||
use crate::{MultiSensorSource, Result, SensorSource};
|
use crate::{MultiSensorSource, Result, SensorSource};
|
||||||
use procfs::page_size;
|
use procfs::page_size;
|
||||||
use procfs::process::all_processes;
|
use procfs::process::all_processes;
|
||||||
use std::vec::IntoIter;
|
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct ProcSource {
|
pub struct ProcSource {
|
||||||
|
|
@ -26,9 +25,8 @@ impl ProcSource {
|
||||||
|
|
||||||
impl MultiSensorSource for ProcSource {
|
impl MultiSensorSource for ProcSource {
|
||||||
type Data = ProcData;
|
type Data = ProcData;
|
||||||
type Iter<'a> = IntoIter<Result<ProcData>>;
|
|
||||||
|
|
||||||
fn read(&mut self) -> Result<Self::Iter<'_>> {
|
fn read(&mut self) -> Result<impl Iterator<Item = Result<Self::Data>>> {
|
||||||
Ok(all_processes()?
|
Ok(all_processes()?
|
||||||
.flatten()
|
.flatten()
|
||||||
.flat_map(|proc| proc.stat())
|
.flat_map(|proc| proc.stat())
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ use crate::{Error, IoResultExt, MultiSensorSource, Result, SensorSource};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::{BufRead, BufReader, ErrorKind, Read, Seek};
|
use std::io::{BufRead, BufReader, ErrorKind, Read, Seek};
|
||||||
use sysconf::{sysconf, SysconfVariable};
|
use sysconf::{SysconfVariable, sysconf};
|
||||||
|
|
||||||
pub struct TemperatureSource {
|
pub struct TemperatureSource {
|
||||||
cpu_sensors: Vec<FileSource>,
|
cpu_sensors: Vec<FileSource>,
|
||||||
|
|
@ -70,11 +70,19 @@ impl SensorSource for TemperatureSource {
|
||||||
fn read(&mut self) -> Result<Self::Data> {
|
fn read(&mut self) -> Result<Self::Data> {
|
||||||
let mut result = Temperatures {
|
let mut result = Temperatures {
|
||||||
cpu: average_sensors(&mut self.cpu_sensors) / 1000.0,
|
cpu: average_sensors(&mut self.cpu_sensors) / 1000.0,
|
||||||
gpu: average_sensors(&mut self.gpu_sensors) / 1000.0,
|
gpu: self
|
||||||
|
.gpu_sensors
|
||||||
|
.iter_mut()
|
||||||
|
.flat_map(|sensor| sensor.read::<f32>())
|
||||||
|
.max_by(f32::total_cmp)
|
||||||
|
.unwrap_or_default()
|
||||||
|
/ 1000.0,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(gpu) = super::gpu::nvidia::temperature() {
|
if let Some(gpu) = super::gpu::nvidia::temperature()
|
||||||
result.gpu = gpu;
|
&& let Some(temp) = gpu.max_by(f32::total_cmp)
|
||||||
|
{
|
||||||
|
result.gpu = temp
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
|
|
@ -224,9 +232,8 @@ impl NetworkSource {
|
||||||
|
|
||||||
impl MultiSensorSource for NetworkSource {
|
impl MultiSensorSource for NetworkSource {
|
||||||
type Data = NetStats;
|
type Data = NetStats;
|
||||||
type Iter<'a> = NetworkStatParser<'a>;
|
|
||||||
|
|
||||||
fn read(&mut self) -> Result<Self::Iter<'_>> {
|
fn read(&mut self) -> Result<impl Iterator<Item = Result<Self::Data>>> {
|
||||||
self.buff.clear();
|
self.buff.clear();
|
||||||
let mut source = File::open("/proc/net/dev").context("error opening netdev")?;
|
let mut source = File::open("/proc/net/dev").context("error opening netdev")?;
|
||||||
source
|
source
|
||||||
|
|
|
||||||
|
|
@ -52,9 +52,9 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
let mut disks = sensors.disks.lock().unwrap();
|
let mut disks = sensors.disks.lock().unwrap();
|
||||||
|
|
||||||
system.refresh_all();
|
system.refresh_all();
|
||||||
networks.refresh();
|
networks.refresh(true);
|
||||||
components.refresh();
|
components.refresh(true);
|
||||||
disks.refresh();
|
disks.refresh(true);
|
||||||
|
|
||||||
let hostname = &sensors.hostname;
|
let hostname = &sensors.hostname;
|
||||||
let mut result = String::with_capacity(256);
|
let mut result = String::with_capacity(256);
|
||||||
|
|
@ -86,6 +86,7 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
|
|
||||||
let gpu_mem_used = WMI.with(|wmi| wmi.gpu_mem())?;
|
let gpu_mem_used = WMI.with(|wmi| wmi.gpu_mem())?;
|
||||||
let gpu_mem = GpuMemory {
|
let gpu_mem = GpuMemory {
|
||||||
|
card: 0,
|
||||||
total: sensors.gpu_mem_total,
|
total: sensors.gpu_mem_total,
|
||||||
free: sensors.gpu_mem_total - gpu_mem_used,
|
free: sensors.gpu_mem_total - gpu_mem_used,
|
||||||
};
|
};
|
||||||
|
|
@ -94,6 +95,7 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
let gpu_engines = WMI.with(|wmi| wmi.gpu_usage())?;
|
let gpu_engines = WMI.with(|wmi| wmi.gpu_usage())?;
|
||||||
for (name, usage) in gpu_engines.into_iter() {
|
for (name, usage) in gpu_engines.into_iter() {
|
||||||
let gpu_usage = GpuUsage {
|
let gpu_usage = GpuUsage {
|
||||||
|
card: 0,
|
||||||
system: Cow::Owned(name),
|
system: Cow::Owned(name),
|
||||||
usage,
|
usage,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
use std::thread::sleep;
|
use std::thread::sleep;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use wmi::{COMLibrary, WMIConnection};
|
use wmi::WMIConnection;
|
||||||
|
|
||||||
pub struct WmiSensor {
|
pub struct WmiSensor {
|
||||||
wmi_con: WMIConnection,
|
wmi_con: WMIConnection,
|
||||||
|
|
@ -15,10 +15,8 @@ pub struct WmiSensor {
|
||||||
|
|
||||||
impl WmiSensor {
|
impl WmiSensor {
|
||||||
pub fn new() -> Result<Self> {
|
pub fn new() -> Result<Self> {
|
||||||
let com_con = COMLibrary::new()?;
|
let wmi_con = WMIConnection::new()?;
|
||||||
let wmi_con = WMIConnection::new(com_con)?;
|
let wmi_hwmon_con = WMIConnection::with_namespace_path("ROOT\\LibreHardwareMonitor").ok();
|
||||||
let wmi_hwmon_con =
|
|
||||||
WMIConnection::with_namespace_path("ROOT\\LibreHardwareMonitor", com_con).ok();
|
|
||||||
|
|
||||||
Ok(WmiSensor {
|
Ok(WmiSensor {
|
||||||
wmi_con,
|
wmi_con,
|
||||||
|
|
@ -155,10 +153,7 @@ fn get_sensor(sensors: &[Sensor], ty: &str, name: &str) -> Option<f32> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_power() {
|
pub fn update_power() {
|
||||||
let Ok(com_con) = COMLibrary::new() else {
|
if let Ok(wmi_con) = WMIConnection::with_namespace_path("ROOT\\LibreHardwareMonitor") {
|
||||||
return;
|
|
||||||
};
|
|
||||||
if let Ok(wmi_con) = WMIConnection::with_namespace_path("ROOT\\LibreHardwareMonitor", com_con) {
|
|
||||||
loop {
|
loop {
|
||||||
if let Some(elapsed) = get_power_elapsed() {
|
if let Some(elapsed) = get_power_elapsed() {
|
||||||
let Ok(sensors) = wmi_con.query::<Sensor>() else {
|
let Ok(sensors) = wmi_con.query::<Sensor>() else {
|
||||||
|
|
@ -194,6 +189,7 @@ pub fn cpu_power() -> CpuPowerUsage {
|
||||||
|
|
||||||
pub fn gpu_power() -> GpuPowerUsage {
|
pub fn gpu_power() -> GpuPowerUsage {
|
||||||
GpuPowerUsage {
|
GpuPowerUsage {
|
||||||
|
card: 0,
|
||||||
gpu_uj: GPU_POWER_UJ.load(Ordering::SeqCst),
|
gpu_uj: GPU_POWER_UJ.load(Ordering::SeqCst),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue