Skip to content

Commit 9d7305e

Browse files
authored
Add perf event support (#315)
* Add perfmon to Stats * Configure perf events with MMTk options * Include perf_counter feature in CI
1 parent a288b40 commit 9d7305e

File tree

13 files changed

+225
-24
lines changed

13 files changed

+225
-24
lines changed

.github/scripts/ci-build.sh

+1
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ if [[ $arch == "x86_64" && $os == "linux" ]]; then
1616
cargo build --target i686-unknown-linux-gnu
1717
for_all_features "cargo build --target i686-unknown-linux-gnu"
1818
for_all_features "cargo build --release --target i686-unknown-linux-gnu"
19+
cargo build --features perf_counter
1920
fi

.github/scripts/ci-common.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ init_non_exclusive_features() {
4040

4141
while IFS= read -r line; do
4242
# Only parse non mutally exclusive features
43-
if [[ $line == *"-- Non mutally exclusive features --"* ]]; then
43+
if [[ $line == *"-- Non mutually exclusive features --"* ]]; then
4444
parse_features=true
4545
continue
4646
fi

.github/scripts/ci-style.sh

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ cargo clippy --manifest-path=vmbindings/dummyvm/Cargo.toml
1717
if [[ $arch == "x86_64" && $os == "linux" ]]; then
1818
for_all_features "cargo clippy --target i686-unknown-linux-gnu"
1919
for_all_features "cargo clippy --release --target i686-unknown-linux-gnu"
20+
cargo clippy --features perf_counter
21+
cargo clippy --release --features perf_counter
22+
cargo clippy --tests --features perf_counter
2023
fi
2124

2225
# check format

.github/scripts/ci-test.sh

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ for_all_features "cargo test"
55
# For x86_64-linux, also check for i686
66
if [[ $arch == "x86_64" && $os == "linux" ]]; then
77
for_all_features "cargo test --target i686-unknown-linux-gnu"
8+
cargo test --features perf_counter
89
fi
910

1011
python examples/build.py

Cargo.toml

+6-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ atomic-traits = "0.2.0"
3333
atomic = "0.4.6"
3434
spin = "0.5.2"
3535
env_logger = "0.8.2"
36+
pfm = {version = "0.0.7", optional = true}
3637

3738
[dev-dependencies]
3839
crossbeam = "0.7.3"
@@ -41,11 +42,15 @@ rand = "0.7.3"
4142
[features]
4243
default = []
4344

45+
# This feature is only supported on x86-64 for now
46+
# It's manually added to CI scripts
47+
perf_counter = ["pfm"]
48+
4449
# .github/scripts/ci-common.sh extracts features from the following part (including from comments).
4550
# So be careful when editing or adding stuff to the section below.
4651

4752
# Do not modify the following line - ci-common.sh matches it
48-
# -- Non mutally exclusive features --
53+
# -- Non mutually exclusive features --
4954

5055
# spaces
5156
vm_space = []

src/scheduler/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ pub(crate) use scheduler::MMTkScheduler;
1515
pub(self) use scheduler::Scheduler;
1616

1717
mod stat;
18-
mod work_counter;
18+
pub(self) mod work_counter;
1919

2020
mod work;
2121
pub use work::CoordinatorWork;

src/scheduler/scheduler.rs

+1
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ mod tests {
410410
// println!("Original: {:?}", data);
411411

412412
SCHEDULER.initialize(NUM_WORKERS, &(), VMThread::UNINITIALIZED);
413+
SCHEDULER.enable_stat();
413414
SCHEDULER.work_buckets[WorkBucketStage::Unconstrained]
414415
.add(Sort(unsafe { &mut *(data as *mut _) }));
415416
SCHEDULER.wait_for_completion();

src/scheduler/stat.rs

+52-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
//! Statistics for work packets
22
use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration};
3+
#[cfg(feature = "perf_counter")]
4+
use crate::scheduler::work_counter::WorkPerfEvent;
5+
use crate::scheduler::Context;
6+
use crate::vm::VMBinding;
7+
use crate::MMTK;
38
use std::any::TypeId;
49
use std::collections::HashMap;
10+
use std::marker::PhantomData;
511
use std::sync::atomic::{AtomicBool, Ordering};
612

713
/// Merge and print the work-packet level statistics from all worker threads
@@ -99,7 +105,7 @@ impl SchedulerStat {
99105
stat
100106
}
101107
/// Merge work counters from different worker threads
102-
pub fn merge(&mut self, stat: &WorkerLocalStat) {
108+
pub fn merge<C>(&mut self, stat: &WorkerLocalStat<C>) {
103109
// Merge work packet type ID to work packet name mapping
104110
for (id, name) in &stat.work_id_name_map {
105111
self.work_id_name_map.insert(*id, *name);
@@ -144,7 +150,7 @@ impl WorkStat {
144150
/// Stop all work counters for the work packet type of the just executed
145151
/// work packet
146152
#[inline(always)]
147-
pub fn end_of_work(&self, worker_stat: &mut WorkerLocalStat) {
153+
pub fn end_of_work<C: Context>(&self, worker_stat: &mut WorkerLocalStat<C>) {
148154
if !worker_stat.is_enabled() {
149155
return;
150156
};
@@ -165,15 +171,27 @@ impl WorkStat {
165171
}
166172

167173
/// Worker thread local counterpart of [`SchedulerStat`]
168-
#[derive(Default)]
169-
pub struct WorkerLocalStat {
174+
pub struct WorkerLocalStat<C> {
170175
work_id_name_map: HashMap<TypeId, &'static str>,
171176
work_counts: HashMap<TypeId, usize>,
172177
work_counters: HashMap<TypeId, Vec<Box<dyn WorkCounter>>>,
173178
enabled: AtomicBool,
179+
_phantom: PhantomData<C>,
180+
}
181+
182+
impl<C> Default for WorkerLocalStat<C> {
183+
fn default() -> Self {
184+
WorkerLocalStat {
185+
work_id_name_map: Default::default(),
186+
work_counts: Default::default(),
187+
work_counters: Default::default(),
188+
enabled: AtomicBool::new(false),
189+
_phantom: Default::default(),
190+
}
191+
}
174192
}
175193

176-
impl WorkerLocalStat {
194+
impl<C: Context> WorkerLocalStat<C> {
177195
#[inline]
178196
pub fn is_enabled(&self) -> bool {
179197
self.enabled.load(Ordering::SeqCst)
@@ -185,23 +203,48 @@ impl WorkerLocalStat {
185203
/// Measure the execution of a work packet by starting all counters for that
186204
/// type
187205
#[inline]
188-
pub fn measure_work(&mut self, work_id: TypeId, work_name: &'static str) -> WorkStat {
206+
pub fn measure_work(
207+
&mut self,
208+
work_id: TypeId,
209+
work_name: &'static str,
210+
context: &'static C,
211+
) -> WorkStat {
189212
let stat = WorkStat {
190213
type_id: work_id,
191214
type_name: work_name,
192215
};
193216
if self.is_enabled() {
194217
self.work_counters
195218
.entry(work_id)
196-
.or_insert_with(WorkerLocalStat::counter_set)
219+
.or_insert_with(|| C::counter_set(context))
197220
.iter_mut()
198221
.for_each(|c| c.start());
199222
}
200223
stat
201224
}
225+
}
202226

203-
// The set of work counters for all work packet types
204-
fn counter_set() -> Vec<Box<dyn WorkCounter>> {
227+
/// Private trait to let different contexts supply different sets of default
228+
/// counters
229+
trait HasCounterSet {
230+
fn counter_set(context: &'static Self) -> Vec<Box<dyn WorkCounter>>;
231+
}
232+
233+
impl<C> HasCounterSet for C {
234+
default fn counter_set(_context: &'static Self) -> Vec<Box<dyn WorkCounter>> {
205235
vec![Box::new(WorkDuration::new())]
206236
}
207237
}
238+
239+
/// Specialization for MMTk to read the options
240+
#[allow(unused_variables, unused_mut)]
241+
impl<VM: VMBinding> HasCounterSet for MMTK<VM> {
242+
fn counter_set(mmtk: &'static Self) -> Vec<Box<dyn WorkCounter>> {
243+
let mut counters: Vec<Box<dyn WorkCounter>> = vec![Box::new(WorkDuration::new())];
244+
#[cfg(feature = "perf_counter")]
245+
for e in &mmtk.options.perf_events.events {
246+
counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2));
247+
}
248+
counters
249+
}
250+
}

src/scheduler/work.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub trait Work<C: Context>: 'static + Send {
1010
fn do_work_with_stat(&mut self, worker: &mut Worker<C>, context: &'static C) {
1111
let stat = worker
1212
.stat
13-
.measure_work(TypeId::of::<Self>(), type_name::<Self>());
13+
.measure_work(TypeId::of::<Self>(), type_name::<Self>(), context);
1414
self.do_work(worker, context);
1515
stat.end_of_work(&mut worker.stat);
1616
}

src/scheduler/work_counter.rs

+81
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,84 @@ impl WorkCounter for WorkDuration {
133133
&mut self.base
134134
}
135135
}
136+
137+
#[cfg(feature = "perf_counter")]
138+
mod perf_event {
139+
//! Measure the perf events of work packets
140+
//!
141+
//! This is built on top of libpfm4.
142+
//! The events to measure are parsed from MMTk option `perf_events`
143+
use super::*;
144+
use libc::{c_int, pid_t};
145+
use pfm::PerfEvent;
146+
use std::fmt;
147+
148+
/// Work counter for perf events
149+
#[derive(Clone)]
150+
pub struct WorkPerfEvent {
151+
base: WorkCounterBase,
152+
running: bool,
153+
event_name: String,
154+
pe: PerfEvent,
155+
}
156+
157+
impl WorkPerfEvent {
158+
/// Create a work counter
159+
///
160+
/// See `perf_event_open` for more details on `pid` and `cpu`
161+
/// Examples:
162+
/// 0, -1 measures the calling thread on all CPUs
163+
/// -1, 0 measures all threads on CPU 0
164+
/// -1, -1 is invalid
165+
pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent {
166+
let mut pe = PerfEvent::new(name)
167+
.unwrap_or_else(|_| panic!("Failed to create perf event {}", name));
168+
pe.open(pid, cpu)
169+
.unwrap_or_else(|_| panic!("Failed to open perf event {}", name));
170+
WorkPerfEvent {
171+
base: Default::default(),
172+
running: false,
173+
event_name: name.to_string(),
174+
pe,
175+
}
176+
}
177+
}
178+
179+
impl fmt::Debug for WorkPerfEvent {
180+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
181+
f.debug_struct("WorkPerfEvent")
182+
.field("base", &self.base)
183+
.field("running", &self.running)
184+
.field("event_name", &self.event_name)
185+
.finish()
186+
}
187+
}
188+
189+
impl WorkCounter for WorkPerfEvent {
190+
fn start(&mut self) {
191+
self.running = true;
192+
self.pe.reset();
193+
self.pe.enable();
194+
}
195+
fn stop(&mut self) {
196+
self.running = true;
197+
let perf_event_value = self.pe.read().unwrap();
198+
self.base.merge_val(perf_event_value.value as f64);
199+
// assert not multiplexing
200+
assert_eq!(perf_event_value.time_enabled, perf_event_value.time_running);
201+
self.pe.disable();
202+
}
203+
fn name(&self) -> String {
204+
self.event_name.to_owned()
205+
}
206+
fn get_base(&self) -> &WorkCounterBase {
207+
&self.base
208+
}
209+
fn get_base_mut(&mut self) -> &mut WorkCounterBase {
210+
&mut self.base
211+
}
212+
}
213+
}
214+
215+
#[cfg(feature = "perf_counter")]
216+
pub(super) use perf_event::WorkPerfEvent;

src/scheduler/worker.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ pub struct Worker<C: Context> {
5252
local: WorkerLocalPtr,
5353
pub local_work_bucket: WorkBucket<C>,
5454
pub sender: Sender<CoordinatorMessage<C>>,
55-
pub stat: WorkerLocalStat,
55+
pub stat: WorkerLocalStat<C>,
5656
context: Option<&'static C>,
5757
is_coordinator: bool,
5858
local_work_buffer: Vec<(WorkBucketStage, Box<dyn Work<C>>)>,

0 commit comments

Comments
 (0)