Skip to content

Commit 20fafba

Browse files
authored
Merge pull request #33 from QuState/feature/reusable-planner
Make planner reusable
2 parents 2df2f00 + b3568d3 commit 20fafba

File tree

11 files changed

+439
-65
lines changed

11 files changed

+439
-65
lines changed

Cargo.toml

+7-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,14 @@ num-traits = "0.2.18"
1515
multiversion = "0.7"
1616

1717
[dev-dependencies]
18-
utilities = { path = "utilities" }
18+
criterion = "0.5.1"
1919
fftw = "0.8.0"
20+
rand = "0.8.5"
21+
utilities = { path = "utilities" }
22+
23+
[[bench]]
24+
name = "bench"
25+
harness = false
2026

2127
[profile.release]
2228
codegen-units = 1

benches/bench.rs

+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
2+
use num_traits::Float;
3+
use phastft::{
4+
fft_32_with_opts_and_plan, fft_64_with_opts_and_plan,
5+
options::Options,
6+
planner::{Direction, Planner32, Planner64},
7+
};
8+
use rand::{
9+
distributions::{Distribution, Standard},
10+
thread_rng, Rng,
11+
};
12+
use utilities::rustfft::num_complex::Complex;
13+
use utilities::rustfft::FftPlanner;
14+
15+
const LENGTHS: &[usize] = &[
16+
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
17+
];
18+
19+
fn generate_numbers<T: Float>(n: usize) -> (Vec<T>, Vec<T>)
20+
where
21+
Standard: Distribution<T>,
22+
{
23+
let mut rng = thread_rng();
24+
25+
let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();
26+
27+
let mut reals = vec![T::zero(); n];
28+
let mut imags = vec![T::zero(); n];
29+
30+
for ((z_re, z_im), rand_chunk) in reals
31+
.iter_mut()
32+
.zip(imags.iter_mut())
33+
.zip(samples.chunks_exact(2))
34+
{
35+
*z_re = rand_chunk[0];
36+
*z_im = rand_chunk[1];
37+
}
38+
39+
(reals, imags)
40+
}
41+
42+
fn generate_complex_numbers<T: Float + Default>(n: usize) -> Vec<Complex<T>>
43+
where
44+
Standard: Distribution<T>,
45+
{
46+
let mut rng = thread_rng();
47+
48+
let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();
49+
50+
let mut signal = vec![Complex::default(); n];
51+
52+
for (z, rand_chunk) in signal.iter_mut().zip(samples.chunks_exact(2)) {
53+
z.re = rand_chunk[0];
54+
z.im = rand_chunk[1];
55+
}
56+
57+
signal
58+
}
59+
60+
fn benchmark_forward_f32(c: &mut Criterion) {
61+
let mut group = c.benchmark_group("Forward f32");
62+
63+
for n in LENGTHS.iter() {
64+
let len = 1 << n;
65+
group.throughput(Throughput::Elements(len as u64));
66+
67+
let id = "PhastFT FFT Forward";
68+
let options = Options::guess_options(len);
69+
let planner = Planner32::new(len, Direction::Forward);
70+
let (mut reals, mut imags) = generate_numbers(len);
71+
72+
group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &len| {
73+
b.iter(|| {
74+
fft_32_with_opts_and_plan(
75+
black_box(&mut reals),
76+
black_box(&mut imags),
77+
black_box(&options),
78+
black_box(&planner),
79+
);
80+
});
81+
});
82+
83+
let id = "RustFFT FFT Forward";
84+
let mut planner = FftPlanner::<f32>::new();
85+
let fft = planner.plan_fft_forward(len);
86+
let mut signal = generate_complex_numbers(len);
87+
88+
group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &len| {
89+
b.iter(|| fft.process(black_box(&mut signal)));
90+
});
91+
}
92+
group.finish();
93+
}
94+
95+
fn benchmark_inverse_f32(c: &mut Criterion) {
96+
let options = Options::default();
97+
98+
for n in LENGTHS.iter() {
99+
let len = 1 << n;
100+
let id = format!("FFT Inverse f32 {} elements", len);
101+
let planner = Planner32::new(len, Direction::Reverse);
102+
103+
c.bench_function(&id, |b| {
104+
let (mut reals, mut imags) = generate_numbers(len);
105+
b.iter(|| {
106+
black_box(fft_32_with_opts_and_plan(
107+
&mut reals, &mut imags, &options, &planner,
108+
));
109+
});
110+
});
111+
}
112+
}
113+
114+
fn benchmark_forward_f64(c: &mut Criterion) {
115+
let options = Options::default();
116+
117+
for n in LENGTHS.iter() {
118+
let len = 1 << n;
119+
let id = format!("FFT Forward f64 {} elements", len);
120+
let planner = Planner64::new(len, Direction::Forward);
121+
122+
c.bench_function(&id, |b| {
123+
let (mut reals, mut imags) = generate_numbers(len);
124+
b.iter(|| {
125+
black_box(fft_64_with_opts_and_plan(
126+
&mut reals, &mut imags, &options, &planner,
127+
));
128+
});
129+
});
130+
}
131+
}
132+
133+
fn benchmark_inverse_f64(c: &mut Criterion) {
134+
let options = Options::default();
135+
136+
for n in LENGTHS.iter() {
137+
let len = 1 << n;
138+
let id = format!("FFT Inverse f64 {} elements", len);
139+
let planner = Planner64::new(len, Direction::Reverse);
140+
141+
c.bench_function(&id, |b| {
142+
let (mut reals, mut imags) = generate_numbers(len);
143+
b.iter(|| {
144+
black_box(fft_64_with_opts_and_plan(
145+
&mut reals, &mut imags, &options, &planner,
146+
));
147+
});
148+
});
149+
}
150+
}
151+
152+
criterion_group!(
153+
benches,
154+
benchmark_forward_f32,
155+
benchmark_inverse_f32,
156+
benchmark_forward_f64,
157+
benchmark_inverse_f64
158+
);
159+
criterion_main!(benches);

examples/benchmark.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,21 @@ use std::str::FromStr;
33

44
use utilities::gen_random_signal;
55

6-
use phastft::fft_64;
7-
use phastft::planner::Direction;
6+
use phastft::fft_64_with_opts_and_plan;
7+
use phastft::options::Options;
8+
use phastft::planner::{Direction, Planner64};
89

910
fn benchmark_fft_64(n: usize) {
1011
let big_n = 1 << n;
1112
let mut reals = vec![0.0; big_n];
1213
let mut imags = vec![0.0; big_n];
1314
gen_random_signal(&mut reals, &mut imags);
1415

16+
let planner = Planner64::new(reals.len(), Direction::Forward);
17+
let opts = Options::guess_options(reals.len());
18+
1519
let now = std::time::Instant::now();
16-
fft_64(&mut reals, &mut imags, Direction::Forward);
20+
fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
1721
let elapsed = now.elapsed().as_micros();
1822
println!("{elapsed}");
1923
}

examples/profile.rs

+18-9
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,29 @@
11
use std::env;
22
use std::str::FromStr;
33

4-
use phastft::fft_64;
5-
use phastft::planner::Direction;
6-
7-
fn benchmark_fft(num_qubits: usize) {
8-
let n = 1 << num_qubits;
9-
let mut reals: Vec<f64> = (1..=n).map(|i| i as f64).collect();
10-
let mut imags: Vec<f64> = (1..=n).map(|i| i as f64).collect();
11-
fft_64(&mut reals, &mut imags, Direction::Forward);
4+
use utilities::gen_random_signal;
5+
6+
use phastft::fft_64_with_opts_and_plan;
7+
use phastft::options::Options;
8+
use phastft::planner::{Direction, Planner64};
9+
10+
fn benchmark_fft_64(n: usize) {
11+
let big_n = 1 << n;
12+
let mut reals = vec![0.0; big_n];
13+
let mut imags = vec![0.0; big_n];
14+
gen_random_signal(&mut reals, &mut imags);
15+
16+
let planner = Planner64::new(reals.len(), Direction::Forward);
17+
let opts = Options::guess_options(reals.len());
18+
19+
fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
1220
}
1321

1422
fn main() {
1523
let args: Vec<String> = env::args().collect();
1624
assert_eq!(args.len(), 2, "Usage {} <n>", args[0]);
1725

1826
let n = usize::from_str(&args[1]).unwrap();
19-
benchmark_fft(n);
27+
28+
benchmark_fft_64(n);
2029
}

examples/rustfft.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use utilities::{
99
fn benchmark_rustfft(n: usize) {
1010
let big_n = 1 << n;
1111

12-
let mut reals = vec![0.0; big_n];
13-
let mut imags = vec![0.0; big_n];
12+
let mut reals = vec![0.0f64; big_n];
13+
let mut imags = vec![0.0f64; big_n];
1414

1515
gen_random_signal(&mut reals, &mut imags);
1616
let mut signal = vec![Complex64::default(); big_n];
@@ -23,9 +23,10 @@ fn benchmark_rustfft(n: usize) {
2323
z.im = im;
2424
});
2525

26-
let now = std::time::Instant::now();
2726
let mut planner = FftPlanner::new();
2827
let fft = planner.plan_fft_forward(signal.len());
28+
29+
let now = std::time::Instant::now();
2930
fft.process(&mut signal);
3031
let elapsed = now.elapsed().as_micros();
3132
println!("{elapsed}");

src/kernels.rs

+32
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ use num_traits::Float;
44

55
macro_rules! fft_butterfly_n_simd {
66
($func_name:ident, $precision:ty, $lanes:literal, $simd_vector:ty) => {
7+
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
8+
"x86_64+avx2+fma", // x86_64-v3
9+
"x86_64+sse4.2", // x86_64-v2
10+
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
11+
"x86+avx2+fma",
12+
"x86+sse4.2",
13+
"x86+sse2",
14+
))]
715
#[inline]
816
pub fn $func_name(
917
reals: &mut [$precision],
@@ -52,6 +60,14 @@ macro_rules! fft_butterfly_n_simd {
5260
fft_butterfly_n_simd!(fft_64_chunk_n_simd, f64, 8, f64x8);
5361
fft_butterfly_n_simd!(fft_32_chunk_n_simd, f32, 16, f32x16);
5462

63+
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
64+
"x86_64+avx2+fma", // x86_64-v3
65+
"x86_64+sse4.2", // x86_64-v2
66+
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
67+
"x86+avx2+fma",
68+
"x86+sse4.2",
69+
"x86+sse2",
70+
))]
5571
#[inline]
5672
pub(crate) fn fft_chunk_n<T: Float>(
5773
reals: &mut [T],
@@ -93,6 +109,14 @@ pub(crate) fn fft_chunk_n<T: Float>(
93109
}
94110

95111
/// `chunk_size == 4`, so hard code twiddle factors
112+
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
113+
"x86_64+avx2+fma", // x86_64-v3
114+
"x86_64+sse4.2", // x86_64-v2
115+
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
116+
"x86+avx2+fma",
117+
"x86+sse4.2",
118+
"x86+sse2",
119+
))]
96120
#[inline]
97121
pub(crate) fn fft_chunk_4<T: Float>(reals: &mut [T], imags: &mut [T]) {
98122
let dist = 2;
@@ -128,6 +152,14 @@ pub(crate) fn fft_chunk_4<T: Float>(reals: &mut [T], imags: &mut [T]) {
128152
}
129153

130154
/// `chunk_size == 2`, so skip phase
155+
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
156+
"x86_64+avx2+fma", // x86_64-v3
157+
"x86_64+sse4.2", // x86_64-v2
158+
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
159+
"x86+avx2+fma",
160+
"x86+sse4.2",
161+
"x86+sse2",
162+
))]
131163
#[inline]
132164
pub(crate) fn fft_chunk_2<T: Float>(reals: &mut [T], imags: &mut [T]) {
133165
reals

0 commit comments

Comments
 (0)