@@ -7,19 +7,19 @@ use super::{fan_control::FanCurve, CommonControllerInfo, FanControlHandle, GpuCo
7
7
use amdgpu_sysfs:: { gpu_handle:: power_profile_mode:: PowerProfileModesTable , hw_mon:: Temperature } ;
8
8
use anyhow:: { anyhow, Context } ;
9
9
use futures:: future:: LocalBoxFuture ;
10
+ use indexmap:: IndexMap ;
10
11
use lact_schema:: {
11
12
ClocksInfo , ClocksTable , ClockspeedStats , DeviceInfo , DeviceStats , DrmInfo , DrmMemoryInfo ,
12
- FanControlMode , FanStats , IntelDrmInfo , LinkInfo , NvidiaClockInfo , NvidiaClocksTable , PmfwInfo ,
13
- PowerState , PowerStates , PowerStats , VoltageStats , VramStats ,
13
+ FanControlMode , FanStats , IntelDrmInfo , LinkInfo , NvidiaClockOffset , NvidiaClocksTable ,
14
+ PmfwInfo , PowerState , PowerStates , PowerStats , VoltageStats , VramStats ,
14
15
} ;
15
16
use nvml_wrapper:: {
16
17
bitmasks:: device:: ThrottleReasons ,
17
- enum_wrappers:: device:: { Brand , Clock , TemperatureSensor , TemperatureThreshold } ,
18
- enums:: device:: DeviceArchitecture ,
18
+ enum_wrappers:: device:: { Clock , PerformanceState , TemperatureSensor , TemperatureThreshold } ,
19
19
Device , Nvml ,
20
20
} ;
21
21
use std:: {
22
- cell:: { Cell , RefCell } ,
22
+ cell:: RefCell ,
23
23
collections:: HashMap ,
24
24
fmt:: Write ,
25
25
rc:: Rc ,
@@ -33,8 +33,8 @@ pub struct NvidiaGpuController {
33
33
common : CommonControllerInfo ,
34
34
fan_control_handle : RefCell < Option < FanControlHandle > > ,
35
35
36
- last_applied_gpc_offset : Cell < Option < i32 > > ,
37
- last_applied_mem_offset : Cell < Option < i32 > > ,
36
+ // Store last applied offsets as a workaround when the driver doesn't tell us the current offset
37
+ last_applied_offsets : RefCell < HashMap < Clock , HashMap < PerformanceState , i32 > > > ,
38
38
}
39
39
40
40
impl NvidiaGpuController {
@@ -50,8 +50,7 @@ impl NvidiaGpuController {
50
50
nvml,
51
51
common,
52
52
fan_control_handle : RefCell :: new ( None ) ,
53
- last_applied_gpc_offset : Cell :: new ( None ) ,
54
- last_applied_mem_offset : Cell :: new ( None ) ,
53
+ last_applied_offsets : RefCell :: new ( HashMap :: new ( ) ) ,
55
54
} )
56
55
}
57
56
@@ -243,20 +242,6 @@ impl NvidiaGpuController {
243
242
244
243
Ok ( power_states)
245
244
}
246
-
247
- // See https://github.com/ilya-zlobintsev/LACT/issues/418
248
- fn vram_offset_ratio ( & self ) -> i32 {
249
- let device = self . device ( ) ;
250
- if let ( Ok ( brand) , Ok ( architecture) ) = ( device. brand ( ) , device. architecture ( ) ) {
251
- let ratio = match ( brand, architecture) {
252
- ( Brand :: GeForce , DeviceArchitecture :: Ada ) => 2 ,
253
- // TODO: check others
254
- _ => 1 ,
255
- } ;
256
- return ratio;
257
- }
258
- 1
259
- }
260
245
}
261
246
262
247
impl GpuController for NvidiaGpuController {
@@ -461,47 +446,46 @@ impl GpuController for NvidiaGpuController {
461
446
fn get_clocks_info ( & self ) -> anyhow:: Result < ClocksInfo > {
462
447
let device = self . device ( ) ;
463
448
464
- let mut gpc = None ;
465
- let mut mem = None ;
466
-
467
- // Negative offset values are not correctly reported by NVML, so we have to use the last known applied value
468
- // instead of the actual read when an unreasonable value appears.
469
-
470
- if let Ok ( max) = device. max_clock_info ( Clock :: Graphics ) {
471
- if let Ok ( offset_range) = device. gpc_clk_min_max_vf_offset ( ) {
472
- if let Some ( offset) = self
473
- . last_applied_gpc_offset
474
- . get ( )
475
- . or_else ( || device. gpc_clk_vf_offset ( ) . ok ( ) )
476
- {
477
- gpc = Some ( NvidiaClockInfo {
478
- max : max as i32 ,
479
- offset,
480
- offset_ratio : 1 ,
481
- offset_range,
482
- } ) ;
483
- }
484
- }
485
- }
449
+ let mut gpu_offsets = IndexMap :: new ( ) ;
450
+ let mut mem_offsets = IndexMap :: new ( ) ;
451
+
452
+ let supported_pstates = device. supported_performance_states ( ) ?;
453
+
454
+ let clock_types = [
455
+ ( Clock :: Graphics , & mut gpu_offsets) ,
456
+ ( Clock :: Memory , & mut mem_offsets) ,
457
+ ] ;
458
+
459
+ for ( clock_type, offsets) in clock_types {
460
+ for pstate in supported_pstates. iter ( ) . rev ( ) {
461
+ if let Ok ( offset) = device. clock_offset ( clock_type, * pstate) {
462
+ let mut offset = NvidiaClockOffset {
463
+ current : offset. clock_offset_mhz ,
464
+ min : offset. min_clock_offset_mhz ,
465
+ max : offset. max_clock_offset_mhz ,
466
+ } ;
467
+
468
+ // On some driver versions, the applied offset values are not reported.
469
+ // In these scenarios we must store them manually for reporting.
470
+ if offset. current == 0 {
471
+ if let Some ( applied_offsets) =
472
+ self . last_applied_offsets . borrow ( ) . get ( & clock_type)
473
+ {
474
+ if let Some ( applied_offset) = applied_offsets. get ( pstate) {
475
+ offset. current = * applied_offset;
476
+ }
477
+ }
478
+ }
486
479
487
- if let Ok ( max) = device. max_clock_info ( Clock :: Memory ) {
488
- if let Ok ( offset_range) = device. mem_clk_min_max_vf_offset ( ) {
489
- if let Some ( offset) = self
490
- . last_applied_mem_offset
491
- . get ( )
492
- . or_else ( || device. mem_clk_vf_offset ( ) . ok ( ) )
493
- {
494
- mem = Some ( NvidiaClockInfo {
495
- max : max as i32 ,
496
- offset,
497
- offset_ratio : self . vram_offset_ratio ( ) ,
498
- offset_range,
499
- } ) ;
480
+ offsets. insert ( pstate. as_c ( ) , offset) ;
500
481
}
501
482
}
502
483
}
503
484
504
- let table = NvidiaClocksTable { gpc, mem } ;
485
+ let table = NvidiaClocksTable {
486
+ gpu_offsets,
487
+ mem_offsets,
488
+ } ;
505
489
506
490
Ok ( ClocksInfo {
507
491
table : Some ( ClocksTable :: Nvidia ( table) ) ,
@@ -564,34 +548,38 @@ impl GpuController for NvidiaGpuController {
564
548
565
549
self . cleanup_clocks ( ) ?;
566
550
567
- if let Some ( max_gpu_clock) = config. clocks_configuration . max_core_clock {
568
- let default_max_clock = device
569
- . max_clock_info ( Clock :: Graphics )
570
- . context ( "Could not read max graphics clock" ) ?;
571
- let offset = max_gpu_clock - default_max_clock as i32 ;
572
- debug ! (
573
- "Using graphics clock offset {offset} (default max clock: {default_max_clock})"
574
- ) ;
575
-
551
+ for ( pstate, offset) in & config. clocks_configuration . gpu_clock_offsets {
552
+ let pstate = PerformanceState :: try_from ( * pstate)
553
+ . map_err ( |_| anyhow ! ( "Invalid pstate '{pstate}'" ) ) ?;
554
+ debug ! ( "applying offset {offset} for GPU pstate {pstate:?}" ) ;
576
555
device
577
- . set_gpc_clk_vf_offset ( offset)
578
- . context ( "Could not set graphics clock offset" ) ?;
579
-
580
- self . last_applied_gpc_offset . set ( Some ( offset) ) ;
556
+ . set_clock_offset ( Clock :: Graphics , pstate, * offset)
557
+ . with_context ( || {
558
+ format ! ( "Could not set clock offset {offset} for GPU pstate {pstate:?}" )
559
+ } ) ?;
560
+
561
+ self . last_applied_offsets
562
+ . borrow_mut ( )
563
+ . entry ( Clock :: Graphics )
564
+ . or_default ( )
565
+ . insert ( pstate, * offset) ;
581
566
}
582
567
583
- if let Some ( max_mem_clock) = config. clocks_configuration . max_memory_clock {
584
- let default_max_clock = device
585
- . max_clock_info ( Clock :: Memory )
586
- . context ( "Could not read max memory clock" ) ?;
587
- let offset = ( max_mem_clock - default_max_clock as i32 ) * self . vram_offset_ratio ( ) ;
588
- debug ! ( "Using mem clock offset {offset} (default max clock: {default_max_clock})" ) ;
589
-
568
+ for ( pstate, offset) in & config. clocks_configuration . mem_clock_offsets {
569
+ let pstate = PerformanceState :: try_from ( * pstate)
570
+ . map_err ( |_| anyhow ! ( "Invalid pstate '{pstate}'" ) ) ?;
571
+ debug ! ( "applying offset {offset} for VRAM pstate {pstate:?}" ) ;
590
572
device
591
- . set_mem_clk_vf_offset ( offset)
592
- . context ( "Could not set memory clock offset" ) ?;
593
-
594
- self . last_applied_mem_offset . set ( Some ( offset) ) ;
573
+ . set_clock_offset ( Clock :: Memory , pstate, * offset)
574
+ . with_context ( || {
575
+ format ! ( "Could not set clock offset {offset} for VRAM pstate {pstate:?}" )
576
+ } ) ?;
577
+
578
+ self . last_applied_offsets
579
+ . borrow_mut ( )
580
+ . entry ( Clock :: Memory )
581
+ . or_default ( )
582
+ . insert ( pstate, * offset) ;
595
583
}
596
584
597
585
if config. fan_control_enabled {
@@ -633,23 +621,33 @@ impl GpuController for NvidiaGpuController {
633
621
fn cleanup_clocks ( & self ) -> anyhow:: Result < ( ) > {
634
622
let device = self . device ( ) ;
635
623
636
- if let Ok ( current_offset) = device. gpc_clk_vf_offset ( ) {
637
- if current_offset != 0 {
638
- device
639
- . set_gpc_clk_vf_offset ( 0 )
640
- . context ( "Could not reset graphics clock offset" ) ?;
641
-
642
- self . last_applied_gpc_offset . set ( None ) ;
643
- }
644
- }
645
-
646
- if let Ok ( current_offset) = device. mem_clk_vf_offset ( ) {
647
- if current_offset != 0 {
648
- device
649
- . set_mem_clk_vf_offset ( 0 )
650
- . context ( "Could not reset memory clock offset" ) ?;
624
+ if let Ok ( supported_pstates) = device. supported_performance_states ( ) {
625
+ for pstate in supported_pstates {
626
+ for clock_type in [ Clock :: Graphics , Clock :: Memory ] {
627
+ if let Ok ( current_offset) = device. clock_offset ( clock_type, pstate) {
628
+ if current_offset. clock_offset_mhz != 0
629
+ || self
630
+ . last_applied_offsets
631
+ . borrow ( )
632
+ . get ( & clock_type)
633
+ . and_then ( |applied_offsets| applied_offsets. get ( & pstate) )
634
+ . is_some_and ( |offset| * offset != 0 )
635
+ {
636
+ debug ! ( "resetting clock offset for {clock_type:?} pstate {pstate:?}" ) ;
637
+ device
638
+ . set_clock_offset ( clock_type, pstate, 0 )
639
+ . with_context ( || {
640
+ format ! ( "Could not reset {clock_type:?} pstate {pstate:?}" )
641
+ } ) ?;
642
+ }
643
+ }
651
644
652
- self . last_applied_mem_offset . set ( None ) ;
645
+ if let Some ( applied_offsets) =
646
+ self . last_applied_offsets . borrow_mut ( ) . get_mut ( & clock_type)
647
+ {
648
+ applied_offsets. remove ( & pstate) ;
649
+ }
650
+ }
653
651
}
654
652
}
655
653
0 commit comments