Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Waves simulation optimisation - part 5 #89

Merged
merged 7 commits into from
Nov 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 79 additions & 31 deletions gz-waves/src/EigenFFTW_TEST.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,33 +27,44 @@
using Eigen::MatrixXcd;
using Eigen::VectorXcd;

namespace Eigen
{
typedef Eigen::Matrix<
double,
Eigen::Dynamic,
Eigen::Dynamic,
Eigen::RowMajor
> MatrixXdRowMajor;
}

//////////////////////////////////////////////////
TEST(EigenFFWT, EigenFFT1D)
TEST(EigenFFWT, DFT_C2C_1D)
{
// Python code to generate test data
//
// x = [0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0]
// xhat = fft.ifft(x, 8, norm="forward")
//
// xhat = fft.fft(x, 8, norm="forward")
// xx = fft.ifft(xhat, 8, norm="forward")
//
// with np.printoptions(precision=16, suppress=True):
// print(f"x: {x}")
// print(f"xhat: {xhat.real}")
// print(f"xhat: {xhat.imag}")

// print(f"x:\n{x}")
// print(f"xhat:\n{xhat.real}\n{xhat.imag}")
// print(f"xx:\n{xx.real}\n{xx.imag}")
//
int n = 8;

// expected inputs and outputs
Eigen::VectorXcd x = Eigen::VectorXcd::Zero(n);
x.real() << 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0;

Eigen::VectorXcd xhat = Eigen::VectorXcd::Zero(n);
xhat.real() << 2., -1.7071067811865475, 1., -0.2928932188134524,
0., -0.2928932188134524, 1., -1.7071067811865475;
xhat.imag() << 0., 0.7071067811865475, -1., 0.7071067811865475,
0., -0.7071067811865475, 1., -0.7071067811865475;
xhat.real() << 0.25, -0.2133883476483184, 0.125, -0.0366116523516816,
0., -0.0366116523516816, 0.125, -0.2133883476483184;
xhat.imag() << 0., -0.0883883476483184, 0.125, -0.0883883476483184,
0., 0.0883883476483184, -0.125, 0.0883883476483184;

{ // using fftw_complex
fftw_complex* in = (fftw_complex*)fftw_malloc(n * sizeof(fftw_complex));
fftw_complex* in = (fftw_complex*)fftw_malloc(n * sizeof(fftw_complex));
fftw_complex* out = (fftw_complex*)fftw_malloc(n * sizeof(fftw_complex));

// create plan
Expand All @@ -62,8 +73,8 @@ TEST(EigenFFWT, EigenFFT1D)
// populate input
for (int i=0; i<n; ++i)
{
in[i][0] = x(i).real();
in[i][1] = x(i).imag();
in[i][0] = xhat(i).real();
in[i][1] = xhat(i).imag();
}

// run fft
Expand All @@ -74,9 +85,9 @@ TEST(EigenFFWT, EigenFFT1D)
{
// std::cerr << "[" << i << "] "
// << out[i][0] << " + " << out[i][1]
// << "\n";
EXPECT_DOUBLE_EQ(out[i][0], xhat(i).real());
EXPECT_DOUBLE_EQ(out[i][1], xhat(i).imag());
// << "\n";
EXPECT_NEAR(out[i][0], x(i).real(), 1.0E-15);
EXPECT_NEAR(out[i][1], 0.0, 1.0E-15);
}

// cleanup
Expand All @@ -100,7 +111,7 @@ TEST(EigenFFWT, EigenFFT1D)
// populate input
for (int i=0; i<n; ++i)
{
in[i] = x(i);
in[i] = xhat(i);
}

// run fft
Expand All @@ -109,8 +120,8 @@ TEST(EigenFFWT, EigenFFT1D)
// check output
for (int i=0; i<n; ++i)
{
EXPECT_DOUBLE_EQ(out[i].real(), xhat(i).real());
EXPECT_DOUBLE_EQ(out[i].imag(), xhat(i).imag());
EXPECT_NEAR(out[i].real(), x(i).real(), 1.0E-15);
EXPECT_NEAR(out[i].imag(), 0.0, 1.0E-15);
}
}

Expand All @@ -128,7 +139,7 @@ TEST(EigenFFWT, EigenFFT1D)
// populate input
for (int i=0; i<n; ++i)
{
in(i) = x(i);
in(i) = xhat(i);
}

// run fft
Expand All @@ -137,20 +148,57 @@ TEST(EigenFFWT, EigenFFT1D)
// check output
for (int i=0; i<n; ++i)
{
EXPECT_DOUBLE_EQ(out(i).real(), xhat(i).real());
EXPECT_DOUBLE_EQ(out(i).imag(), xhat(i).imag());
EXPECT_NEAR(out(i).real(), x(i).real(), 1.0E-15);
EXPECT_NEAR(out(i).imag(), 0.0, 1.0E-15);
}
}
}

namespace Eigen
{
typedef Eigen::Matrix<
double,
Eigen::Dynamic,
Eigen::Dynamic,
Eigen::RowMajor
> MatrixXdRowMajor;
//////////////////////////////////////////////////
TEST(EigenFFWT, DFT_C2R_1D)
{
// Python code to generate test data
int n = 8;

// expected inputs and outputs
Eigen::VectorXcd x = Eigen::VectorXcd::Zero(n);
x.real() << 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0;

Eigen::VectorXcd xhat = Eigen::VectorXcd::Zero(n/2+1);
xhat.real() << 0.25, -0.2133883476483184, 0.125, -0.0366116523516816,
0.;
xhat.imag() << 0., -0.0883883476483184, 0.125, -0.0883883476483184,
0.;

{
std::vector<std::complex<double>> in(n/2+1, 0.0);
std::vector<double> out(n, 0.0);

// create plan
fftw_plan plan = fftw_plan_dft_c2r_1d(
n,
reinterpret_cast<fftw_complex*>(in.data()),
reinterpret_cast<double*>(out.data()),
FFTW_ESTIMATE);

// populate input
for (int i=0; i<n/2+1; ++i)
{
in[i] = xhat(i);
}

// run fft
fftw_execute(plan);

// check output
for (int i=0; i<n; ++i)
{
EXPECT_NEAR(out[i], x(i).real(), 1.0E-15);
}

// cleanup
fftw_destroy_plan(plan);
}
}

//////////////////////////////////////////////////
Expand Down
119 changes: 60 additions & 59 deletions gz-waves/src/WaveSimulationFFT.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ namespace waves
ly_(ly),
lambda_(0.6)
{
ComputeBaseAmplitudes();
CreateFFTWPlans();
ComputeBaseAmplitudes();
}

//////////////////////////////////////////////////
Expand Down Expand Up @@ -97,7 +97,7 @@ namespace waves

// change from row to column major storage
size_t n2 = nx_ * ny_;
h = fft_out0_.reshaped<Eigen::ColMajor>(n2, 1).real();
h = fft_out0_.reshaped<Eigen::ColMajor>(n2, 1);
}

//////////////////////////////////////////////////
Expand All @@ -111,8 +111,8 @@ namespace waves

// change from row to column major storage
size_t n2 = nx_ * ny_;
dhdy = fft_out1_.reshaped<Eigen::ColMajor>(n2, 1).real();
dhdx = fft_out2_.reshaped<Eigen::ColMajor>(n2, 1).real();
dhdy = fft_out1_.reshaped<Eigen::ColMajor>(n2, 1);
dhdx = fft_out2_.reshaped<Eigen::ColMajor>(n2, 1);
}

//////////////////////////////////////////////////
Expand All @@ -126,8 +126,8 @@ namespace waves

// change from row to column major storage
size_t n2 = nx_ * ny_;
sy = fft_out3_.reshaped<Eigen::ColMajor>(n2, 1).real() * lambda_ * -1.0;
sx = fft_out4_.reshaped<Eigen::ColMajor>(n2, 1).real() * lambda_ * -1.0;
sy = fft_out3_.reshaped<Eigen::ColMajor>(n2, 1) * lambda_ * -1.0;
sx = fft_out4_.reshaped<Eigen::ColMajor>(n2, 1) * lambda_ * -1.0;
}

//////////////////////////////////////////////////
Expand All @@ -143,15 +143,14 @@ namespace waves

// change from row to column major storage
size_t n2 = nx_ * ny_;
dsydy = fft_out5_.reshaped<Eigen::ColMajor>(n2, 1).real() * lambda_ * -1.0;
dsxdx = fft_out6_.reshaped<Eigen::ColMajor>(n2, 1).real() * lambda_ * -1.0;
dsxdy = fft_out7_.reshaped<Eigen::ColMajor>(n2, 1).real() * lambda_ * 1.0;
dsydy = fft_out5_.reshaped<Eigen::ColMajor>(n2, 1) * lambda_ * -1.0;
dsxdx = fft_out6_.reshaped<Eigen::ColMajor>(n2, 1) * lambda_ * -1.0;
dsxdy = fft_out7_.reshaped<Eigen::ColMajor>(n2, 1) * lambda_ * 1.0;
}

//////////////////////////////////////////////////
void WaveSimulationFFTImpl::ComputeBaseAmplitudes()
{
InitFFTCoeffStorage();
InitWaveNumbers();

// initialise arrays - always update as algo switch may change shape.
Expand Down Expand Up @@ -310,15 +309,17 @@ namespace waves

zhat_(0, 0) = complex(0.0, 0.0);

// write into fft_h_, fft_h_ikx_, fft_h_iky_, etc.
/// write into fft_h_, fft_h_ikx_, fft_h_iky_, etc.
/// \note the inner loop has iky < ny_ / 2 + 1
/// as we exploit the Hermitian symmetry in the FFT
const complex iunit(0.0, 1.0);
const complex czero(0.0, 0.0);

for (int ikx = 0, idx = 0; ikx < nx_; ++ikx)
for (int ikx = 0; ikx < nx_; ++ikx)
{
double kx = kx_fft_[ikx];
double kx2 = kx*kx;
for (int iky = 0; iky < ny_; ++iky, ++idx)
for (int iky = 0; iky < ny_/2+1; ++iky)
{
double ky = ky_fft_[iky];
double ky2 = ky*ky;
Expand Down Expand Up @@ -367,20 +368,6 @@ namespace waves
}
}

//////////////////////////////////////////////////
void WaveSimulationFFTImpl::InitFFTCoeffStorage()
{
// initialise storage for Fourier coefficients
fft_h_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_h_ikx_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_h_iky_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_sx_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_sy_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_h_kxkx_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_h_kyky_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_h_kxky_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
}

//////////////////////////////////////////////////
void WaveSimulationFFTImpl::InitWaveNumbers()
{
Expand All @@ -404,53 +391,67 @@ namespace waves
//////////////////////////////////////////////////
void WaveSimulationFFTImpl::CreateFFTWPlans()
{
/// \note the input and output arrays may be overridden during
/// planning, so allocate here before initialising.
/// https://www.fftw.org/fftw3_doc/Complex-DFTs.html

// allocate storage for Fourier coefficients
fft_h_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_h_ikx_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_h_iky_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_sx_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_sy_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_h_kxkx_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_h_kyky_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);
fft_h_kxky_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_/2+1);

// elevation
fft_out0_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out1_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out2_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out0_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);
fft_out1_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);
fft_out2_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);

// xy-displacements
fft_out3_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out4_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out5_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out6_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out7_ = Eigen::MatrixXcdRowMajor::Zero(nx_, ny_);
fft_out3_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);
fft_out4_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);
fft_out5_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);
fft_out6_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);
fft_out7_ = Eigen::MatrixXdRowMajor::Zero(nx_, ny_);

// elevation
fft_plan0_ = fftw_plan_dft_2d(nx_, ny_,
fft_plan0_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_h_.data()),
reinterpret_cast<fftw_complex*>(fft_out0_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
fft_plan1_ = fftw_plan_dft_2d(nx_, ny_,
reinterpret_cast<double*>(fft_out0_.data()),
FFTW_ESTIMATE);
fft_plan1_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_h_ikx_.data()),
reinterpret_cast<fftw_complex*>(fft_out1_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
fft_plan2_ = fftw_plan_dft_2d(nx_, ny_,
reinterpret_cast<double*>(fft_out1_.data()),
FFTW_ESTIMATE);
fft_plan2_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_h_iky_.data()),
reinterpret_cast<fftw_complex*>(fft_out2_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
reinterpret_cast<double*>(fft_out2_.data()),
FFTW_ESTIMATE);

// xy-displacements
fft_plan3_ = fftw_plan_dft_2d(nx_, ny_,
fft_plan3_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_sx_.data()),
reinterpret_cast<fftw_complex*>(fft_out3_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
fft_plan4_ = fftw_plan_dft_2d(nx_, ny_,
reinterpret_cast<double*>(fft_out3_.data()),
FFTW_ESTIMATE);
fft_plan4_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_sy_.data()),
reinterpret_cast<fftw_complex*>(fft_out4_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
fft_plan5_ = fftw_plan_dft_2d(nx_, ny_,
reinterpret_cast<double*>(fft_out4_.data()),
FFTW_ESTIMATE);
fft_plan5_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_h_kxkx_.data()),
reinterpret_cast<fftw_complex*>(fft_out5_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
fft_plan6_ = fftw_plan_dft_2d(nx_, ny_,
reinterpret_cast<double*>(fft_out5_.data()),
FFTW_ESTIMATE);
fft_plan6_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_h_kyky_.data()),
reinterpret_cast<fftw_complex*>(fft_out6_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
fft_plan7_ = fftw_plan_dft_2d(nx_, ny_,
reinterpret_cast<double*>(fft_out6_.data()),
FFTW_ESTIMATE);
fft_plan7_ = fftw_plan_dft_c2r_2d(nx_, ny_,
reinterpret_cast<fftw_complex*>(fft_h_kxky_.data()),
reinterpret_cast<fftw_complex*>(fft_out7_.data()),
FFTW_BACKWARD, FFTW_ESTIMATE);
reinterpret_cast<double*>(fft_out7_.data()),
FFTW_ESTIMATE);
}

//////////////////////////////////////////////////
Expand Down
Loading