Skip to content

Commit e24b0e2

Browse files
feat: nvidia per-pstate clock offset configuration (#461)
* feat: nvidia per-pstate clock offset * feat: update ui * chore: update tests and doc
1 parent 79e827e commit e24b0e2

File tree

8 files changed

+285
-141
lines changed

8 files changed

+285
-141
lines changed

docs/CONFIG.md

+12-5
Original file line numberDiff line numberDiff line change
@@ -150,20 +150,27 @@ gpus:
150150
memory_clock:
151151
- 0
152152
- 1
153-
# Minimum GPU clockspeed in MHz.
153+
154+
# Minimum GPU clockspeed in MHz. Applicable to AMD and Intel.
154155
min_core_clock: 300
155-
# Minimum VRAM clockspeed in MHz.
156+
# Minimum VRAM clockspeed in MHz. Applicable to AMD only.
156157
min_memory_clock: 500
157-
# Minimum GPU voltage in mV.
158+
# Minimum GPU voltage in mV. Applicable to AMD only.
158159
min_voltage: 900
159-
# Maximum GPU clockspeed in MHz.
160+
# Maximum GPU clockspeed in MHz. Applicable to AMD and Intel.
160161
max_core_clock: 1630
161-
# Maximum VRAM clockspeed in MHz.
162+
# Maximum VRAM clockspeed in MHz. Applicable to AMD only.
162163
max_memory_clock: 800
163164
# Maximum GPU voltage in mV.
164165
max_voltage: 1200
165166
# Voltage offset value in mV for RDNA and newer AMD GPUs.
166167
voltage_offset: 0
168+
169+
# GPU and VRAM clockspeed offset values, per-pstate. Only applicable on Nvidia.
170+
gpu_clock_offsets:
171+
0: -100
172+
mem_clock_offsets:
173+
0: 200
167174

168175
# Settings profiles
169176
profiles:

lact-daemon/src/config.rs

+39-2
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,18 @@ pub struct Gpu {
108108
}
109109

110110
#[skip_serializing_none]
111-
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
111+
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
112112
pub struct ClocksConfiguration {
113113
pub min_core_clock: Option<i32>,
114114
pub min_memory_clock: Option<i32>,
115115
pub min_voltage: Option<i32>,
116116
pub max_core_clock: Option<i32>,
117117
pub max_memory_clock: Option<i32>,
118118
pub max_voltage: Option<i32>,
119+
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
120+
pub gpu_clock_offsets: IndexMap<u32, i32>,
121+
#[serde(default, skip_serializing_if = "IndexMap::is_empty")]
122+
pub mem_clock_offsets: IndexMap<u32, i32>,
119123
pub voltage_offset: Option<i32>,
120124
}
121125

@@ -135,6 +139,22 @@ impl Gpu {
135139
ClockspeedType::MinMemoryClock => clocks.min_memory_clock = value,
136140
ClockspeedType::MinVoltage => clocks.min_voltage = value,
137141
ClockspeedType::VoltageOffset => clocks.voltage_offset = value,
142+
ClockspeedType::GpuClockOffset(pstate) => match value {
143+
Some(value) => {
144+
clocks.gpu_clock_offsets.insert(pstate, value);
145+
}
146+
None => {
147+
clocks.gpu_clock_offsets.shift_remove(&pstate);
148+
}
149+
},
150+
ClockspeedType::MemClockOffset(pstate) => match value {
151+
Some(value) => {
152+
clocks.mem_clock_offsets.insert(pstate, value);
153+
}
154+
None => {
155+
clocks.mem_clock_offsets.shift_remove(&pstate);
156+
}
157+
},
138158
ClockspeedType::Reset => {
139159
*clocks = ClocksConfiguration::default();
140160
assert!(!self.is_core_clocks_used());
@@ -234,6 +254,23 @@ impl Config {
234254
}
235255
}
236256
}
257+
2 => {
258+
for (id, gpu) in &mut self.gpus {
259+
if id.starts_with(VENDOR_NVIDIA) {
260+
gpu.clocks_configuration.max_core_clock = None;
261+
gpu.clocks_configuration.max_memory_clock = None;
262+
}
263+
}
264+
265+
for profile in &mut self.profiles.values_mut() {
266+
for (id, gpu) in &mut profile.gpus {
267+
if id.starts_with(VENDOR_NVIDIA) {
268+
gpu.clocks_configuration.max_core_clock = None;
269+
gpu.clocks_configuration.max_memory_clock = None;
270+
}
271+
}
272+
}
273+
}
237274
_ => break,
238275
}
239276
info!("migrated config version {} to {next_version}", self.version);
@@ -511,7 +548,7 @@ mod tests {
511548
.unwrap()
512549
.clocks_configuration
513550
.max_core_clock,
514-
Some(3000)
551+
None,
515552
);
516553
assert_eq!(
517554
config

lact-daemon/src/server/gpu_controller/nvidia.rs

+97-99
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,19 @@ use super::{fan_control::FanCurve, CommonControllerInfo, FanControlHandle, GpuCo
77
use amdgpu_sysfs::{gpu_handle::power_profile_mode::PowerProfileModesTable, hw_mon::Temperature};
88
use anyhow::{anyhow, Context};
99
use futures::future::LocalBoxFuture;
10+
use indexmap::IndexMap;
1011
use lact_schema::{
1112
ClocksInfo, ClocksTable, ClockspeedStats, DeviceInfo, DeviceStats, DrmInfo, DrmMemoryInfo,
12-
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockInfo, NvidiaClocksTable, PmfwInfo,
13-
PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
13+
FanControlMode, FanStats, IntelDrmInfo, LinkInfo, NvidiaClockOffset, NvidiaClocksTable,
14+
PmfwInfo, PowerState, PowerStates, PowerStats, VoltageStats, VramStats,
1415
};
1516
use nvml_wrapper::{
1617
bitmasks::device::ThrottleReasons,
17-
enum_wrappers::device::{Brand, Clock, TemperatureSensor, TemperatureThreshold},
18-
enums::device::DeviceArchitecture,
18+
enum_wrappers::device::{Clock, PerformanceState, TemperatureSensor, TemperatureThreshold},
1919
Device, Nvml,
2020
};
2121
use std::{
22-
cell::{Cell, RefCell},
22+
cell::RefCell,
2323
collections::HashMap,
2424
fmt::Write,
2525
rc::Rc,
@@ -33,8 +33,8 @@ pub struct NvidiaGpuController {
3333
common: CommonControllerInfo,
3434
fan_control_handle: RefCell<Option<FanControlHandle>>,
3535

36-
last_applied_gpc_offset: Cell<Option<i32>>,
37-
last_applied_mem_offset: Cell<Option<i32>>,
36+
// Store last applied offsets as a workaround when the driver doesn't tell us the current offset
37+
last_applied_offsets: RefCell<HashMap<Clock, HashMap<PerformanceState, i32>>>,
3838
}
3939

4040
impl NvidiaGpuController {
@@ -50,8 +50,7 @@ impl NvidiaGpuController {
5050
nvml,
5151
common,
5252
fan_control_handle: RefCell::new(None),
53-
last_applied_gpc_offset: Cell::new(None),
54-
last_applied_mem_offset: Cell::new(None),
53+
last_applied_offsets: RefCell::new(HashMap::new()),
5554
})
5655
}
5756

@@ -243,20 +242,6 @@ impl NvidiaGpuController {
243242

244243
Ok(power_states)
245244
}
246-
247-
// See https://github.com/ilya-zlobintsev/LACT/issues/418
248-
fn vram_offset_ratio(&self) -> i32 {
249-
let device = self.device();
250-
if let (Ok(brand), Ok(architecture)) = (device.brand(), device.architecture()) {
251-
let ratio = match (brand, architecture) {
252-
(Brand::GeForce, DeviceArchitecture::Ada) => 2,
253-
// TODO: check others
254-
_ => 1,
255-
};
256-
return ratio;
257-
}
258-
1
259-
}
260245
}
261246

262247
impl GpuController for NvidiaGpuController {
@@ -461,47 +446,46 @@ impl GpuController for NvidiaGpuController {
461446
fn get_clocks_info(&self) -> anyhow::Result<ClocksInfo> {
462447
let device = self.device();
463448

464-
let mut gpc = None;
465-
let mut mem = None;
466-
467-
// Negative offset values are not correctly reported by NVML, so we have to use the last known applied value
468-
// instead of the actual read when an unreasonable value appears.
469-
470-
if let Ok(max) = device.max_clock_info(Clock::Graphics) {
471-
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() {
472-
if let Some(offset) = self
473-
.last_applied_gpc_offset
474-
.get()
475-
.or_else(|| device.gpc_clk_vf_offset().ok())
476-
{
477-
gpc = Some(NvidiaClockInfo {
478-
max: max as i32,
479-
offset,
480-
offset_ratio: 1,
481-
offset_range,
482-
});
483-
}
484-
}
485-
}
449+
let mut gpu_offsets = IndexMap::new();
450+
let mut mem_offsets = IndexMap::new();
451+
452+
let supported_pstates = device.supported_performance_states()?;
453+
454+
let clock_types = [
455+
(Clock::Graphics, &mut gpu_offsets),
456+
(Clock::Memory, &mut mem_offsets),
457+
];
458+
459+
for (clock_type, offsets) in clock_types {
460+
for pstate in supported_pstates.iter().rev() {
461+
if let Ok(offset) = device.clock_offset(clock_type, *pstate) {
462+
let mut offset = NvidiaClockOffset {
463+
current: offset.clock_offset_mhz,
464+
min: offset.min_clock_offset_mhz,
465+
max: offset.max_clock_offset_mhz,
466+
};
467+
468+
// On some driver versions, the applied offset values are not reported.
469+
// In these scenarios we must store them manually for reporting.
470+
if offset.current == 0 {
471+
if let Some(applied_offsets) =
472+
self.last_applied_offsets.borrow().get(&clock_type)
473+
{
474+
if let Some(applied_offset) = applied_offsets.get(pstate) {
475+
offset.current = *applied_offset;
476+
}
477+
}
478+
}
486479

487-
if let Ok(max) = device.max_clock_info(Clock::Memory) {
488-
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() {
489-
if let Some(offset) = self
490-
.last_applied_mem_offset
491-
.get()
492-
.or_else(|| device.mem_clk_vf_offset().ok())
493-
{
494-
mem = Some(NvidiaClockInfo {
495-
max: max as i32,
496-
offset,
497-
offset_ratio: self.vram_offset_ratio(),
498-
offset_range,
499-
});
480+
offsets.insert(pstate.as_c(), offset);
500481
}
501482
}
502483
}
503484

504-
let table = NvidiaClocksTable { gpc, mem };
485+
let table = NvidiaClocksTable {
486+
gpu_offsets,
487+
mem_offsets,
488+
};
505489

506490
Ok(ClocksInfo {
507491
table: Some(ClocksTable::Nvidia(table)),
@@ -564,34 +548,38 @@ impl GpuController for NvidiaGpuController {
564548

565549
self.cleanup_clocks()?;
566550

567-
if let Some(max_gpu_clock) = config.clocks_configuration.max_core_clock {
568-
let default_max_clock = device
569-
.max_clock_info(Clock::Graphics)
570-
.context("Could not read max graphics clock")?;
571-
let offset = max_gpu_clock - default_max_clock as i32;
572-
debug!(
573-
"Using graphics clock offset {offset} (default max clock: {default_max_clock})"
574-
);
575-
551+
for (pstate, offset) in &config.clocks_configuration.gpu_clock_offsets {
552+
let pstate = PerformanceState::try_from(*pstate)
553+
.map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
554+
debug!("applying offset {offset} for GPU pstate {pstate:?}");
576555
device
577-
.set_gpc_clk_vf_offset(offset)
578-
.context("Could not set graphics clock offset")?;
579-
580-
self.last_applied_gpc_offset.set(Some(offset));
556+
.set_clock_offset(Clock::Graphics, pstate, *offset)
557+
.with_context(|| {
558+
format!("Could not set clock offset {offset} for GPU pstate {pstate:?}")
559+
})?;
560+
561+
self.last_applied_offsets
562+
.borrow_mut()
563+
.entry(Clock::Graphics)
564+
.or_default()
565+
.insert(pstate, *offset);
581566
}
582567

583-
if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock {
584-
let default_max_clock = device
585-
.max_clock_info(Clock::Memory)
586-
.context("Could not read max memory clock")?;
587-
let offset = (max_mem_clock - default_max_clock as i32) * self.vram_offset_ratio();
588-
debug!("Using mem clock offset {offset} (default max clock: {default_max_clock})");
589-
568+
for (pstate, offset) in &config.clocks_configuration.mem_clock_offsets {
569+
let pstate = PerformanceState::try_from(*pstate)
570+
.map_err(|_| anyhow!("Invalid pstate '{pstate}'"))?;
571+
debug!("applying offset {offset} for VRAM pstate {pstate:?}");
590572
device
591-
.set_mem_clk_vf_offset(offset)
592-
.context("Could not set memory clock offset")?;
593-
594-
self.last_applied_mem_offset.set(Some(offset));
573+
.set_clock_offset(Clock::Memory, pstate, *offset)
574+
.with_context(|| {
575+
format!("Could not set clock offset {offset} for VRAM pstate {pstate:?}")
576+
})?;
577+
578+
self.last_applied_offsets
579+
.borrow_mut()
580+
.entry(Clock::Memory)
581+
.or_default()
582+
.insert(pstate, *offset);
595583
}
596584

597585
if config.fan_control_enabled {
@@ -633,23 +621,33 @@ impl GpuController for NvidiaGpuController {
633621
fn cleanup_clocks(&self) -> anyhow::Result<()> {
634622
let device = self.device();
635623

636-
if let Ok(current_offset) = device.gpc_clk_vf_offset() {
637-
if current_offset != 0 {
638-
device
639-
.set_gpc_clk_vf_offset(0)
640-
.context("Could not reset graphics clock offset")?;
641-
642-
self.last_applied_gpc_offset.set(None);
643-
}
644-
}
645-
646-
if let Ok(current_offset) = device.mem_clk_vf_offset() {
647-
if current_offset != 0 {
648-
device
649-
.set_mem_clk_vf_offset(0)
650-
.context("Could not reset memory clock offset")?;
624+
if let Ok(supported_pstates) = device.supported_performance_states() {
625+
for pstate in supported_pstates {
626+
for clock_type in [Clock::Graphics, Clock::Memory] {
627+
if let Ok(current_offset) = device.clock_offset(clock_type, pstate) {
628+
if current_offset.clock_offset_mhz != 0
629+
|| self
630+
.last_applied_offsets
631+
.borrow()
632+
.get(&clock_type)
633+
.and_then(|applied_offsets| applied_offsets.get(&pstate))
634+
.is_some_and(|offset| *offset != 0)
635+
{
636+
debug!("resetting clock offset for {clock_type:?} pstate {pstate:?}");
637+
device
638+
.set_clock_offset(clock_type, pstate, 0)
639+
.with_context(|| {
640+
format!("Could not reset {clock_type:?} pstate {pstate:?}")
641+
})?;
642+
}
643+
}
651644

652-
self.last_applied_mem_offset.set(None);
645+
if let Some(applied_offsets) =
646+
self.last_applied_offsets.borrow_mut().get_mut(&clock_type)
647+
{
648+
applied_offsets.remove(&pstate);
649+
}
650+
}
653651
}
654652
}
655653

lact-daemon/src/snapshots/lact_daemon__config__tests__parse_doc.snap

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ gpus:
4242
max_core_clock: 1630
4343
max_memory_clock: 800
4444
max_voltage: 1200
45+
gpu_clock_offsets:
46+
0: -100
47+
mem_clock_offsets:
48+
0: 200
4549
voltage_offset: 0
4650
power_profile_mode_index: 0
4751
custom_power_profile_mode_hueristics:

0 commit comments

Comments
 (0)