24
24
#include <asm/sbi.h>
25
25
#include <asm/hwcap.h>
26
26
27
+ #define SYSCTL_NO_USER_ACCESS 0
28
+ #define SYSCTL_USER_ACCESS 1
29
+ #define SYSCTL_LEGACY 2
30
+
31
+ #define PERF_EVENT_FLAG_NO_USER_ACCESS BIT(SYSCTL_NO_USER_ACCESS)
32
+ #define PERF_EVENT_FLAG_USER_ACCESS BIT(SYSCTL_USER_ACCESS)
33
+ #define PERF_EVENT_FLAG_LEGACY BIT(SYSCTL_LEGACY)
34
+
27
35
PMU_FORMAT_ATTR (event , "config:0-47" );
28
36
PMU_FORMAT_ATTR (firmware , "config:63" );
29
37
@@ -43,6 +51,9 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
43
51
NULL ,
44
52
};
45
53
54
+ /* Allow user mode access by default */
55
+ static int sysctl_perf_user_access __read_mostly = SYSCTL_USER_ACCESS ;
56
+
46
57
/*
47
58
* RISC-V doesn't have heterogeneous harts yet. This need to be part of
48
59
* per_cpu in case of harts with different pmu counters
@@ -301,6 +312,11 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
301
312
}
302
313
EXPORT_SYMBOL_GPL (riscv_pmu_get_hpm_info );
303
314
315
+ static uint8_t pmu_sbi_csr_index (struct perf_event * event )
316
+ {
317
+ return pmu_ctr_list [event -> hw .idx ].csr - CSR_CYCLE ;
318
+ }
319
+
304
320
static unsigned long pmu_sbi_get_filter_flags (struct perf_event * event )
305
321
{
306
322
unsigned long cflags = 0 ;
@@ -329,18 +345,34 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
329
345
struct cpu_hw_events * cpuc = this_cpu_ptr (rvpmu -> hw_events );
330
346
struct sbiret ret ;
331
347
int idx ;
332
- uint64_t cbase = 0 ;
348
+ uint64_t cbase = 0 , cmask = rvpmu -> cmask ;
333
349
unsigned long cflags = 0 ;
334
350
335
351
cflags = pmu_sbi_get_filter_flags (event );
352
+
353
+ /*
354
+ * In legacy mode, we have to force the fixed counters for those events
355
+ * but not in the user access mode as we want to use the other counters
356
+ * that support sampling/filtering.
357
+ */
358
+ if (hwc -> flags & PERF_EVENT_FLAG_LEGACY ) {
359
+ if (event -> attr .config == PERF_COUNT_HW_CPU_CYCLES ) {
360
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH ;
361
+ cmask = 1 ;
362
+ } else if (event -> attr .config == PERF_COUNT_HW_INSTRUCTIONS ) {
363
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH ;
364
+ cmask = 1UL << (CSR_INSTRET - CSR_CYCLE );
365
+ }
366
+ }
367
+
336
368
/* retrieve the available counter index */
337
369
#if defined(CONFIG_32BIT )
338
370
ret = sbi_ecall (SBI_EXT_PMU , SBI_EXT_PMU_COUNTER_CFG_MATCH , cbase ,
339
- rvpmu -> cmask , cflags , hwc -> event_base , hwc -> config ,
371
+ cmask , cflags , hwc -> event_base , hwc -> config ,
340
372
hwc -> config >> 32 );
341
373
#else
342
374
ret = sbi_ecall (SBI_EXT_PMU , SBI_EXT_PMU_COUNTER_CFG_MATCH , cbase ,
343
- rvpmu -> cmask , cflags , hwc -> event_base , hwc -> config , 0 );
375
+ cmask , cflags , hwc -> event_base , hwc -> config , 0 );
344
376
#endif
345
377
if (ret .error ) {
346
378
pr_debug ("Not able to find a counter for event %lx config %llx\n" ,
@@ -474,6 +506,22 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
474
506
return val ;
475
507
}
476
508
509
+ static void pmu_sbi_set_scounteren (void * arg )
510
+ {
511
+ struct perf_event * event = (struct perf_event * )arg ;
512
+
513
+ csr_write (CSR_SCOUNTEREN ,
514
+ csr_read (CSR_SCOUNTEREN ) | (1 << pmu_sbi_csr_index (event )));
515
+ }
516
+
517
+ static void pmu_sbi_reset_scounteren (void * arg )
518
+ {
519
+ struct perf_event * event = (struct perf_event * )arg ;
520
+
521
+ csr_write (CSR_SCOUNTEREN ,
522
+ csr_read (CSR_SCOUNTEREN ) & ~(1 << pmu_sbi_csr_index (event )));
523
+ }
524
+
477
525
static void pmu_sbi_ctr_start (struct perf_event * event , u64 ival )
478
526
{
479
527
struct sbiret ret ;
@@ -490,13 +538,21 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
490
538
if (ret .error && (ret .error != SBI_ERR_ALREADY_STARTED ))
491
539
pr_err ("Starting counter idx %d failed with error %d\n" ,
492
540
hwc -> idx , sbi_err_map_linux_errno (ret .error ));
541
+
542
+ if ((hwc -> flags & PERF_EVENT_FLAG_USER_ACCESS ) &&
543
+ (hwc -> flags & PERF_EVENT_FLAG_USER_READ_CNT ))
544
+ pmu_sbi_set_scounteren ((void * )event );
493
545
}
494
546
495
547
static void pmu_sbi_ctr_stop (struct perf_event * event , unsigned long flag )
496
548
{
497
549
struct sbiret ret ;
498
550
struct hw_perf_event * hwc = & event -> hw ;
499
551
552
+ if ((hwc -> flags & PERF_EVENT_FLAG_USER_ACCESS ) &&
553
+ (hwc -> flags & PERF_EVENT_FLAG_USER_READ_CNT ))
554
+ pmu_sbi_reset_scounteren ((void * )event );
555
+
500
556
ret = sbi_ecall (SBI_EXT_PMU , SBI_EXT_PMU_COUNTER_STOP , hwc -> idx , 1 , flag , 0 , 0 , 0 );
501
557
if (ret .error && (ret .error != SBI_ERR_ALREADY_STOPPED ) &&
502
558
flag != SBI_PMU_STOP_FLAG_RESET )
@@ -704,10 +760,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
704
760
struct cpu_hw_events * cpu_hw_evt = this_cpu_ptr (pmu -> hw_events );
705
761
706
762
/*
707
- * Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace,
708
- * as is necessary to maintain uABI compatibility .
763
+ * We keep enabling userspace access to CYCLE, TIME and INSTRET via the
764
+ * legacy option but that will be removed in the future .
709
765
*/
710
- csr_write (CSR_SCOUNTEREN , 0x7 );
766
+ if (sysctl_perf_user_access == SYSCTL_LEGACY )
767
+ csr_write (CSR_SCOUNTEREN , 0x7 );
768
+ else
769
+ csr_write (CSR_SCOUNTEREN , 0x2 );
711
770
712
771
/* Stop all the counters so that they can be enabled from perf */
713
772
pmu_sbi_stop_all (pmu );
@@ -838,6 +897,121 @@ static void riscv_pmu_destroy(struct riscv_pmu *pmu)
838
897
cpuhp_state_remove_instance (CPUHP_AP_PERF_RISCV_STARTING , & pmu -> node );
839
898
}
840
899
900
+ static void pmu_sbi_event_init (struct perf_event * event )
901
+ {
902
+ /*
903
+ * The permissions are set at event_init so that we do not depend
904
+ * on the sysctl value that can change.
905
+ */
906
+ if (sysctl_perf_user_access == SYSCTL_NO_USER_ACCESS )
907
+ event -> hw .flags |= PERF_EVENT_FLAG_NO_USER_ACCESS ;
908
+ else if (sysctl_perf_user_access == SYSCTL_USER_ACCESS )
909
+ event -> hw .flags |= PERF_EVENT_FLAG_USER_ACCESS ;
910
+ else
911
+ event -> hw .flags |= PERF_EVENT_FLAG_LEGACY ;
912
+ }
913
+
914
+ static void pmu_sbi_event_mapped (struct perf_event * event , struct mm_struct * mm )
915
+ {
916
+ if (event -> hw .flags & PERF_EVENT_FLAG_NO_USER_ACCESS )
917
+ return ;
918
+
919
+ if (event -> hw .flags & PERF_EVENT_FLAG_LEGACY ) {
920
+ if (event -> attr .config != PERF_COUNT_HW_CPU_CYCLES &&
921
+ event -> attr .config != PERF_COUNT_HW_INSTRUCTIONS ) {
922
+ return ;
923
+ }
924
+ }
925
+
926
+ /*
927
+ * The user mmapped the event to directly access it: this is where
928
+ * we determine based on sysctl_perf_user_access if we grant userspace
929
+ * the direct access to this event. That means that within the same
930
+ * task, some events may be directly accessible and some other may not,
931
+ * if the user changes the value of sysctl_perf_user_accesss in the
932
+ * meantime.
933
+ */
934
+
935
+ event -> hw .flags |= PERF_EVENT_FLAG_USER_READ_CNT ;
936
+
937
+ /*
938
+ * We must enable userspace access *before* advertising in the user page
939
+ * that it is possible to do so to avoid any race.
940
+ * And we must notify all cpus here because threads that currently run
941
+ * on other cpus will try to directly access the counter too without
942
+ * calling pmu_sbi_ctr_start.
943
+ */
944
+ if (event -> hw .flags & PERF_EVENT_FLAG_USER_ACCESS )
945
+ on_each_cpu_mask (mm_cpumask (mm ),
946
+ pmu_sbi_set_scounteren , (void * )event , 1 );
947
+ }
948
+
949
+ static void pmu_sbi_event_unmapped (struct perf_event * event , struct mm_struct * mm )
950
+ {
951
+ if (event -> hw .flags & PERF_EVENT_FLAG_NO_USER_ACCESS )
952
+ return ;
953
+
954
+ if (event -> hw .flags & PERF_EVENT_FLAG_LEGACY ) {
955
+ if (event -> attr .config != PERF_COUNT_HW_CPU_CYCLES &&
956
+ event -> attr .config != PERF_COUNT_HW_INSTRUCTIONS ) {
957
+ return ;
958
+ }
959
+ }
960
+
961
+ /*
962
+ * Here we can directly remove user access since the user does not have
963
+ * access to the user page anymore so we avoid the racy window where the
964
+ * user could have read cap_user_rdpmc to true right before we disable
965
+ * it.
966
+ */
967
+ event -> hw .flags &= ~PERF_EVENT_FLAG_USER_READ_CNT ;
968
+
969
+ if (event -> hw .flags & PERF_EVENT_FLAG_USER_ACCESS )
970
+ on_each_cpu_mask (mm_cpumask (mm ),
971
+ pmu_sbi_reset_scounteren , (void * )event , 1 );
972
+ }
973
+
974
+ static void riscv_pmu_update_counter_access (void * info )
975
+ {
976
+ if (sysctl_perf_user_access == SYSCTL_LEGACY )
977
+ csr_write (CSR_SCOUNTEREN , 0x7 );
978
+ else
979
+ csr_write (CSR_SCOUNTEREN , 0x2 );
980
+ }
981
+
982
+ static int riscv_pmu_proc_user_access_handler (struct ctl_table * table ,
983
+ int write , void * buffer ,
984
+ size_t * lenp , loff_t * ppos )
985
+ {
986
+ int prev = sysctl_perf_user_access ;
987
+ int ret = proc_dointvec_minmax (table , write , buffer , lenp , ppos );
988
+
989
+ /*
990
+ * Test against the previous value since we clear SCOUNTEREN when
991
+ * sysctl_perf_user_access is set to SYSCTL_USER_ACCESS, but we should
992
+ * not do that if that was already the case.
993
+ */
994
+ if (ret || !write || prev == sysctl_perf_user_access )
995
+ return ret ;
996
+
997
+ on_each_cpu (riscv_pmu_update_counter_access , NULL , 1 );
998
+
999
+ return 0 ;
1000
+ }
1001
+
1002
+ static struct ctl_table sbi_pmu_sysctl_table [] = {
1003
+ {
1004
+ .procname = "perf_user_access" ,
1005
+ .data = & sysctl_perf_user_access ,
1006
+ .maxlen = sizeof (unsigned int ),
1007
+ .mode = 0644 ,
1008
+ .proc_handler = riscv_pmu_proc_user_access_handler ,
1009
+ .extra1 = SYSCTL_ZERO ,
1010
+ .extra2 = SYSCTL_TWO ,
1011
+ },
1012
+ { }
1013
+ };
1014
+
841
1015
static int pmu_sbi_device_probe (struct platform_device * pdev )
842
1016
{
843
1017
struct riscv_pmu * pmu = NULL ;
@@ -881,6 +1055,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
881
1055
pmu -> ctr_get_width = pmu_sbi_ctr_get_width ;
882
1056
pmu -> ctr_clear_idx = pmu_sbi_ctr_clear_idx ;
883
1057
pmu -> ctr_read = pmu_sbi_ctr_read ;
1058
+ pmu -> event_init = pmu_sbi_event_init ;
1059
+ pmu -> event_mapped = pmu_sbi_event_mapped ;
1060
+ pmu -> event_unmapped = pmu_sbi_event_unmapped ;
1061
+ pmu -> csr_index = pmu_sbi_csr_index ;
884
1062
885
1063
ret = cpuhp_state_add_instance (CPUHP_AP_PERF_RISCV_STARTING , & pmu -> node );
886
1064
if (ret )
@@ -894,6 +1072,8 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
894
1072
if (ret )
895
1073
goto out_unregister ;
896
1074
1075
+ register_sysctl ("kernel" , sbi_pmu_sysctl_table );
1076
+
897
1077
return 0 ;
898
1078
899
1079
out_unregister :
0 commit comments