mirror of
https://codeberg.org/icewind/palantir.git
synced 2026-06-03 10:14:09 +02:00
improved power handling
This commit is contained in:
parent
1df1369165
commit
224bf80588
6 changed files with 79 additions and 54 deletions
17
src/data.rs
17
src/data.rs
|
|
@ -184,14 +184,13 @@ impl SensorData for DiskUsage {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct PowerUsage {
|
pub struct CpuPowerUsage {
|
||||||
pub cpu_uj: u64,
|
pub cpu_uj: u64,
|
||||||
pub cpu_packages_uj: Vec<u64>,
|
pub cpu_packages_uj: Vec<u64>,
|
||||||
pub gpu_uj: u64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PowerUsage {
|
impl SensorData for CpuPowerUsage {
|
||||||
pub fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut w,
|
&mut w,
|
||||||
r#"total_power{{host="{}", device="cpu"}} {:.3}"#,
|
r#"total_power{{host="{}", device="cpu"}} {:.3}"#,
|
||||||
|
|
@ -209,6 +208,16 @@ impl PowerUsage {
|
||||||
)
|
)
|
||||||
.ok();
|
.ok();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct GpuPowerUsage {
|
||||||
|
pub gpu_uj: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SensorData for GpuPowerUsage {
|
||||||
|
fn write<W: Write>(&self, mut w: W, hostname: &str) {
|
||||||
if self.gpu_uj > 0 {
|
if self.gpu_uj > 0 {
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut w,
|
&mut w,
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
use std::thread::sleep;
|
use std::thread::sleep;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
pub mod nvidia;
|
pub mod nvidia;
|
||||||
|
|
||||||
|
|
@ -78,6 +79,7 @@ pub fn update_gpu_power() {
|
||||||
let current_power: u64 = match file.read() {
|
let current_power: u64 = match file.read() {
|
||||||
Ok(current_power) => current_power,
|
Ok(current_power) => current_power,
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
|
warn!("failed to read gpu power sensor");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -91,6 +93,7 @@ pub fn update_gpu_power() {
|
||||||
sleep(Duration::from_millis(500));
|
sleep(Duration::from_millis(500));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
info!("no gpu sensor");
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn gpu_power() -> u64 {
|
pub fn gpu_power() -> u64 {
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ use std::io;
|
||||||
use std::io::{ErrorKind, Read, Seek};
|
use std::io::{ErrorKind, Read, Seek};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
use tracing::{debug, instrument, warn};
|
||||||
|
|
||||||
fn read_to_string_trimmed(path: &Path) -> io::Result<String> {
|
fn read_to_string_trimmed(path: &Path) -> io::Result<String> {
|
||||||
let mut s = read_to_string(path)?;
|
let mut s = read_to_string(path)?;
|
||||||
|
|
@ -17,10 +18,15 @@ pub struct FileSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileSource {
|
impl FileSource {
|
||||||
|
#[instrument(skip_all, fields(path = ?path.as_ref()))]
|
||||||
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<FileSource> {
|
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<FileSource> {
|
||||||
|
debug!("opening sensor");
|
||||||
Ok(FileSource {
|
Ok(FileSource {
|
||||||
buff: String::with_capacity(32),
|
buff: String::with_capacity(32),
|
||||||
file: File::open(path)?,
|
file: File::open(path).map_err(|e| {
|
||||||
|
warn!("failed to open sensor");
|
||||||
|
e
|
||||||
|
})?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ use self::disk::*;
|
||||||
use self::sensors::*;
|
use self::sensors::*;
|
||||||
use crate::linux::disk::zfs::arcstats;
|
use crate::linux::disk::zfs::arcstats;
|
||||||
use crate::linux::gpu::{update_gpu_power, utilization};
|
use crate::linux::gpu::{update_gpu_power, utilization};
|
||||||
use crate::linux::power::power_usage;
|
use crate::linux::power::{CpuPowerSource, GpuPowerSource};
|
||||||
use crate::{hostname, Error, MultiSensorSource, Result, SensorData, SensorSource};
|
use crate::{hostname, Error, MultiSensorSource, Result, SensorData, SensorSource};
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
|
@ -29,6 +29,8 @@ pub struct Sensors {
|
||||||
mem: Mutex<MemorySource>,
|
mem: Mutex<MemorySource>,
|
||||||
disk_stats: Mutex<DiskStatSource>,
|
disk_stats: Mutex<DiskStatSource>,
|
||||||
disk_usage: Mutex<DiskUsageSource>,
|
disk_usage: Mutex<DiskUsageSource>,
|
||||||
|
cpu_power: Mutex<CpuPowerSource>,
|
||||||
|
gpu_power: Mutex<GpuPowerSource>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sensors {
|
impl Sensors {
|
||||||
|
|
@ -43,6 +45,8 @@ impl Sensors {
|
||||||
mem: Mutex::new(MemorySource::new()?),
|
mem: Mutex::new(MemorySource::new()?),
|
||||||
disk_stats: Mutex::new(DiskStatSource::new()?),
|
disk_stats: Mutex::new(DiskStatSource::new()?),
|
||||||
disk_usage: Mutex::new(DiskUsageSource::new()?),
|
disk_usage: Mutex::new(DiskUsageSource::new()?),
|
||||||
|
cpu_power: Mutex::new(CpuPowerSource::new().unwrap_or_default()),
|
||||||
|
gpu_power: Mutex::new(GpuPowerSource::default()),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -56,6 +60,8 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
let cpu = sensors.cpu.lock().unwrap().read()?;
|
let cpu = sensors.cpu.lock().unwrap().read()?;
|
||||||
let memory = sensors.mem.lock().unwrap().read()?;
|
let memory = sensors.mem.lock().unwrap().read()?;
|
||||||
let temperatures = sensors.temp.lock().unwrap().read()?;
|
let temperatures = sensors.temp.lock().unwrap().read()?;
|
||||||
|
let cpu_power = sensors.cpu_power.lock().unwrap().read()?;
|
||||||
|
let gpu_power = sensors.gpu_power.lock().unwrap().read()?;
|
||||||
let mut net = sensors.net.lock().unwrap();
|
let mut net = sensors.net.lock().unwrap();
|
||||||
let networks = net.read()?;
|
let networks = net.read()?;
|
||||||
let pools = pools();
|
let pools = pools();
|
||||||
|
|
@ -99,9 +105,8 @@ pub fn get_metrics(sensors: &Sensors) -> Result<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(power) = power_usage()? {
|
cpu_power.write(&mut result, &sensors.hostname);
|
||||||
power.write(&mut result, &sensors.hostname);
|
gpu_power.write(&mut result, &sensors.hostname);
|
||||||
}
|
|
||||||
if let Some(arc) = arcstats()? {
|
if let Some(arc) = arcstats()? {
|
||||||
arc.write(&mut result, &sensors.hostname);
|
arc.write(&mut result, &sensors.hostname);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,56 +1,58 @@
|
||||||
use crate::data::PowerUsage;
|
use crate::data::{CpuPowerUsage, GpuPowerUsage};
|
||||||
use crate::linux::gpu::gpu_power;
|
use crate::linux::gpu::gpu_power;
|
||||||
use crate::{Error, Result};
|
use crate::linux::hwmon::FileSource;
|
||||||
use std::fs::{read_dir, read_to_string};
|
use crate::{Result, SensorSource};
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::fs::read_dir;
|
||||||
use tracing::warn;
|
|
||||||
|
|
||||||
static CAN_READ: AtomicBool = AtomicBool::new(true);
|
#[derive(Default)]
|
||||||
|
pub struct CpuPowerSource {
|
||||||
pub fn power_usage() -> Result<Option<PowerUsage>> {
|
sources: Vec<FileSource>,
|
||||||
if !CAN_READ.load(Ordering::Relaxed) {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let dir = match read_dir("/sys/devices/virtual/powercap/intel-rapl") {
|
impl CpuPowerSource {
|
||||||
Ok(dir) => dir,
|
pub fn new() -> Result<CpuPowerSource> {
|
||||||
Err(_) => {
|
let sources: Vec<_> = read_dir("/sys/devices/virtual/powercap/intel-rapl")?
|
||||||
CAN_READ.store(false, Ordering::Relaxed);
|
.flatten()
|
||||||
return Ok(None);
|
.filter(|path| {
|
||||||
}
|
path.file_name()
|
||||||
};
|
|
||||||
let mut usage = PowerUsage::default();
|
|
||||||
for package in dir {
|
|
||||||
let package = package?;
|
|
||||||
if package
|
|
||||||
.file_name()
|
|
||||||
.to_str()
|
.to_str()
|
||||||
.ok_or_else(|| Error::Other("Invalid name".into()))?
|
.unwrap_or_default()
|
||||||
.starts_with("intel-rapl")
|
.starts_with("intel-rapl")
|
||||||
{
|
})
|
||||||
let mut package_path = package.path();
|
.map(|entry| {
|
||||||
package_path.push("energy_uj");
|
let mut path = entry.path();
|
||||||
let package_usage = match read_to_string(&package_path) {
|
path.push("energy_uj");
|
||||||
Err(e) if e.raw_os_error() == Some(13) => {
|
path
|
||||||
CAN_READ.store(false, Ordering::Relaxed);
|
})
|
||||||
warn!(
|
.flat_map(FileSource::open)
|
||||||
package_path = display(package_path.display()),
|
.collect();
|
||||||
"can\'t read power usage"
|
|
||||||
);
|
Ok(CpuPowerSource { sources })
|
||||||
return Ok(None);
|
|
||||||
}
|
}
|
||||||
result => result,
|
}
|
||||||
}?;
|
|
||||||
let package_usage = package_usage.trim().parse::<u64>()?;
|
impl SensorSource for CpuPowerSource {
|
||||||
|
type Data = CpuPowerUsage;
|
||||||
|
|
||||||
|
fn read(&mut self) -> Result<Self::Data> {
|
||||||
|
let mut usage = CpuPowerUsage::default();
|
||||||
|
for source in self.sources.iter_mut() {
|
||||||
|
let package_usage = source.read()?;
|
||||||
usage.cpu_uj += package_usage;
|
usage.cpu_uj += package_usage;
|
||||||
usage.cpu_packages_uj.push(package_usage);
|
usage.cpu_packages_uj.push(package_usage);
|
||||||
}
|
}
|
||||||
|
Ok(usage)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
usage.gpu_uj = gpu_power();
|
#[derive(Default)]
|
||||||
if let Some(nvidia_power) = crate::linux::gpu::nvidia::power() {
|
pub struct GpuPowerSource;
|
||||||
usage.gpu_uj = nvidia_power;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Some(usage))
|
impl SensorSource for GpuPowerSource {
|
||||||
|
type Data = GpuPowerUsage;
|
||||||
|
|
||||||
|
fn read(&mut self) -> Result<Self::Data> {
|
||||||
|
let gpu_uj = crate::linux::gpu::nvidia::power().unwrap_or_else(gpu_power);
|
||||||
|
Ok(GpuPowerUsage { gpu_uj })
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ impl TemperatureSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn average_sensors(sensors: &mut [FileSource]) -> f32 {
|
pub fn average_sensors(sensors: &mut [FileSource]) -> f32 {
|
||||||
if sensors.is_empty() {
|
if sensors.is_empty() {
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue