Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An implementation of the Stehlé-Zimmermann algorithm #3996

Merged
merged 42 commits into from
May 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
d12a6d2
But we need polynomials.
pleroy Apr 25, 2024
cd5571d
Use cpp_rational polynomials.
pleroy Apr 27, 2024
f4c908e
Some progress.
pleroy Apr 27, 2024
0aed8ae
P̃ computation.
pleroy Apr 27, 2024
d3fe7e5
The lattice.
pleroy Apr 27, 2024
a5e213e
Sort by norm.
pleroy Apr 27, 2024
05f9005
Error path.
pleroy Apr 27, 2024
844960d
Find Q's coefficients.
pleroy Apr 27, 2024
5dd159a
Some compilation fixes.
pleroy Apr 28, 2024
66d842b
More fixes.
pleroy Apr 28, 2024
7dc0b4f
Fix all compilation errors.
pleroy Apr 28, 2024
e5dc8d2
Link error.
pleroy Apr 28, 2024
8eb32f4
Debugging and traces.
pleroy Apr 21, 2024
6962b56
Linear combination.
pleroy Apr 28, 2024
1b720a2
A complete implementation.
pleroy Apr 28, 2024
d2b65c6
Debugging.
pleroy Apr 29, 2024
017e6f9
Handle no integer zeroes.
pleroy Apr 30, 2024
74b9077
Scale Sin.
pleroy May 1, 2024
478ffbd
Maybe some progress, maybe not.
pleroy May 1, 2024
045eb38
Which norm is this?
pleroy May 1, 2024
fc7870f
A solution suspiciously close to the midpoint.
pleroy May 1, 2024
69c4e94
Mathematica floating-point.
pleroy May 1, 2024
907cd6f
Cleanup.
pleroy May 1, 2024
b32752f
More failure checks and GCD.
pleroy May 2, 2024
a812003
Zero test.
pleroy May 2, 2024
0ac92c3
Finally, a solution.
pleroy May 4, 2024
1ee625c
Merge branch 'master' into BadCases
pleroy May 4, 2024
5a41efa
Try to use cpp_int whenever possible, I broke the lattice.
pleroy May 4, 2024
0aeffd7
Rationals work better.
pleroy May 4, 2024
5f15235
Actually, integers and a copy.
pleroy May 4, 2024
d8c5389
Clean the test.
pleroy May 4, 2024
7643f06
DebugString support.
pleroy May 4, 2024
470a1b4
Numbers in generators.
pleroy May 5, 2024
8f61740
Some cleanup of matrix algorithms.
pleroy May 5, 2024
d368c68
Rounding.
pleroy May 5, 2024
96e503b
More quantities cleanups.
pleroy May 5, 2024
68f9a36
Bibliography.
pleroy May 5, 2024
eac99a3
Renaming.
pleroy May 5, 2024
09920ba
Remove traces.
pleroy May 5, 2024
fc459ca
Merge.
pleroy May 5, 2024
eb7f006
Bad merge.
pleroy May 5, 2024
33a95b8
Readying.
pleroy May 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions documentation/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -2051,6 +2051,20 @@ @inproceedings{SofroniouSpaletta2002
venue = {Amsterdam, The Netherlands},
}

@inproceedings{StehléZimmermann2005,
author = {Stehlé, Damien and Zimmermann, Paul},
editor = {Montuschi, Paolo and Schwarz, Eric},
publisher = {IEEE Computer Society},
booktitle = {17th IEEE Symposium on Computer Arithmetic (ARITH'05)},
date = {2005-06},
doi = {10.1109/ARITH.2005.24},
eventdate = {2005-06-27/2005-06-29},
isbn = {0-7695-2366-8},
pages = {257--264},
title = {Gal's accurate tables method revisited},
venue = {Cape Cod, MA, USA},
}

@inproceedings{WuZhang1991,
author = {Wu, Xiaolin and Zhang, Kaizhong},
editor = {Storer, James A. and Reif, John H.},
Expand Down
Binary file modified documentation/bibliography.pdf
Binary file not shown.
29 changes: 24 additions & 5 deletions functions/accurate_table_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,50 @@
#include <functional>
#include <vector>

#include "absl/status/statusor.h"
#include "boost/multiprecision/cpp_bin_float.hpp"
#include "boost/multiprecision/cpp_int.hpp"
#include "numerics/polynomial_in_monomial_basis.hpp"

namespace principia {
namespace functions {
namespace _accurate_table_generator {
namespace internal {

using namespace boost::multiprecision;
using namespace principia::numerics::_polynomial_in_monomial_basis;

using AccurateFunction = std::function<cpp_bin_float_50(cpp_rational const&)>;

template<typename ArgValue, int degree>
using AccuratePolynomial =
PolynomialInMonomialBasis<ArgValue, ArgValue, degree>;

template<std::int64_t zeroes>
cpp_rational ExhaustiveSearch(std::vector<AccurateFunction> const& functions,
cpp_rational const& starting_argument);
cpp_rational GalExhaustiveSearch(std::vector<AccurateFunction> const& functions,
cpp_rational const& starting_argument);

template<std::int64_t zeroes>
std::vector<cpp_rational> ExhaustiveMultisearch(
std::vector<cpp_rational> GalExhaustiveMultisearch(
std::vector<AccurateFunction> const& functions,
std::vector<cpp_rational> const& starting_arguments);

// Searches in an interval of radius |T / N| centered on |near_argument|. The
// |polynomials| must be the degree-2 Taylor approximations of the |functions|.
template<std::int64_t zeroes>
absl::StatusOr<cpp_rational> StehléZimmermannSimultaneousSearch(
std::array<AccurateFunction, 2> const& functions,
std::array<AccuratePolynomial<cpp_rational, 2>, 2> const& polynomials,
cpp_rational const& near_argument,
std::int64_t N,
std::int64_t T);

} // namespace internal

using internal::ExhaustiveMultisearch;
using internal::ExhaustiveSearch;
using internal::AccuratePolynomial;
using internal::GalExhaustiveMultisearch;
using internal::GalExhaustiveSearch;
using internal::StehléZimmermannSimultaneousSearch;

} // namespace _accurate_table_generator
} // namespace functions
Expand Down
258 changes: 236 additions & 22 deletions functions/accurate_table_generator_body.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,61 @@
#include <algorithm>
#include <future>
#include <limits>
#include <memory>
#include <thread>
#include <vector>

#include "base/for_all_of.hpp"
#include "base/tags.hpp"
#include "base/thread_pool.hpp"
#include "glog/logging.h"
#include "numerics/fixed_arrays.hpp"
#include "numerics/lattices.hpp"
#include "numerics/matrix_computations.hpp"
#include "numerics/matrix_views.hpp"
#include "quantities/elementary_functions.hpp"

namespace principia {
namespace functions {
namespace _accurate_table_generator {
namespace internal {

using namespace principia::base::_for_all_of;
using namespace principia::base::_tags;
using namespace principia::base::_thread_pool;
using namespace principia::numerics::_fixed_arrays;
using namespace principia::numerics::_lattices;
using namespace principia::numerics::_matrix_computations;
using namespace principia::numerics::_matrix_views;
using namespace principia::quantities::_elementary_functions;

constexpr std::int64_t ε_computation_points = 16;

template<int rows, int columns>
FixedMatrix<cpp_int, rows, columns> ToInt(
FixedMatrix<cpp_rational, rows, columns> const& m) {
FixedMatrix<cpp_int, rows, columns> result(uninitialized);
for (std::int64_t i = 0; i < rows; ++i) {
for (std::int64_t j = 0; j < columns; ++j) {
auto const& mᵢⱼ = m(i, j);
DCHECK_EQ(1, denominator(mᵢⱼ));
result(i, j) = numerator(m(i, j));
}
}
return result;
}

template<int rows, int columns>
FixedMatrix<cpp_rational, rows, columns> ToRational(
FixedMatrix<cpp_int, rows, columns> const& m) {
FixedMatrix<cpp_rational, rows, columns> result(uninitialized);
for (std::int64_t i = 0; i < rows; ++i) {
for (std::int64_t j = 0; j < columns; ++j) {
result(i, j) = m(i, j);
}
}
return result;
}

template<std::int64_t zeroes>
bool HasDesiredZeroes(cpp_bin_float_50 const& y) {
Expand All @@ -30,28 +73,8 @@ bool HasDesiredZeroes(cpp_bin_float_50 const& y) {
}

template<std::int64_t zeroes>
std::vector<cpp_rational> ExhaustiveMultisearch(
std::vector<AccurateFunction> const& functions,
std::vector<cpp_rational> const& starting_arguments) {
ThreadPool<cpp_rational> search_pool(std::thread::hardware_concurrency());

std::vector<std::future<cpp_rational>> futures;
for (std::int64_t i = 0; i < starting_arguments.size(); ++i) {
futures.push_back(search_pool.Add([i, &functions, &starting_arguments]() {
return ExhaustiveSearch<zeroes>(functions, starting_arguments[i]);
}));
}

std::vector<cpp_rational> results;
for (auto& future : futures) {
results.push_back(future.get());
}
return results;
}

template<std::int64_t zeroes>
cpp_rational ExhaustiveSearch(std::vector<AccurateFunction> const& functions,
cpp_rational const& starting_argument) {
cpp_rational GalExhaustiveSearch(std::vector<AccurateFunction> const& functions,
cpp_rational const& starting_argument) {
CHECK_LT(0, starting_argument);

// We will look for candidates both above and below |starting_argument|.
Expand Down Expand Up @@ -85,6 +108,197 @@ cpp_rational ExhaustiveSearch(std::vector<AccurateFunction> const& functions,
}
}

template<std::int64_t zeroes>
std::vector<cpp_rational> GalExhaustiveMultisearch(
std::vector<AccurateFunction> const& functions,
std::vector<cpp_rational> const& starting_arguments) {
ThreadPool<cpp_rational> search_pool(std::thread::hardware_concurrency());

std::vector<std::future<cpp_rational>> futures;
for (std::int64_t i = 0; i < starting_arguments.size(); ++i) {
futures.push_back(search_pool.Add([i, &functions, &starting_arguments]() {
return GalExhaustiveSearch<zeroes>(functions, starting_arguments[i]);
}));
}

std::vector<cpp_rational> results;
for (auto& future : futures) {
results.push_back(future.get());
}
return results;
}

template<std::int64_t zeroes>
absl::StatusOr<cpp_rational> StehléZimmermannSimultaneousSearch(
std::array<AccurateFunction, 2> const& functions,
std::array<AccuratePolynomial<cpp_rational, 2>, 2> const& polynomials,
cpp_rational const& near_argument,
std::int64_t const N,
std::int64_t const T) {
// This implementation follows [SZ05], section 3.1.
std::int64_t const M = 1 << zeroes;

// Preliminary: shift and rescale the functions and the polynomials:
// Fᵢ = N fᵢ(t / N)
// Pᵢ = N pᵢ(t / N)
std::array<AccurateFunction, 2> F;
std::array<std::optional<AccuratePolynomial<cpp_rational, 2>>, 2> P;
for (std::int64_t i = 0; i < functions.size(); ++i) {
F[i] = [&functions, i, N, &near_argument](cpp_rational const& t) {
// Here |t| <= |T|.
return N * functions[i](near_argument + t / N);
};
}
AccuratePolynomial<cpp_rational, 1> const shift_and_rescale(
{near_argument, cpp_rational(1, N)});
for (std::int64_t i = 0; i < polynomials.size(); ++i) {
P[i] = N * Compose(polynomials[i], shift_and_rescale);
}

// Step 2: compute ε. We don't care too much about its accuracy because in
// general the largest error is on the boundary of the domain, and anyway ε
// has virtually no incidence on the value of Mʹ.
cpp_rational const T_increment = cpp_rational(T, ε_computation_points);
cpp_bin_float_50 ε = 0;
for (std::int64_t i = 0; i < 2; ++i) {
for (cpp_rational t = -T; t <= T; t += T_increment) {
ε = std::max(ε, abs(F[i](t) - static_cast<cpp_bin_float_50>((*P[i])(t))));
}
}
VLOG(1) << "ε = " << ε;

// Step 3, first part: compute Mʹ and C. Give up is C is 0, which may happen
// if ε is too large.
auto const Mʹ = static_cast<std::int64_t>(floor(M / (2 + 2 * M * ε)));
auto const C = 3 * Mʹ;
if (C == 0) {
return absl::FailedPreconditionError("Error too large");
}
VLOG(1) << "C = " << C;

// Step 3, second part: compute P̃
std::array<std::optional<AccuratePolynomial<cpp_int, 2>>, 2> P̃;
AccuratePolynomial<cpp_rational, 1> const Tτ({0, T});
for (std::int64_t i = 0; i < P.size(); ++i) {
auto const composition_coefficients = Compose(C * *P[i], Tτ).coefficients();
AccuratePolynomial<cpp_int, 2>::Coefficients P̃_coefficients;
for_all_of(composition_coefficients, P̃_coefficients)
.loop([](auto const& composition_coefficient, auto& P̃_coefficient) {
P̃_coefficient = static_cast<cpp_int>(Round(composition_coefficient));
});
P̃[i] = AccuratePolynomial<cpp_int, 2>(P̃_coefficients);
VLOG(1) << "P̃[" << i << "] = " << *P̃[i];
}

// Step 5 and 6: form the lattice. Note that our vectors are in columns, not
// in rows as in the paper.
auto const& P̃₀_coefficients = P̃[0]->coefficients();
auto const& P̃₁_coefficients = P̃[1]->coefficients();

using Lattice = FixedMatrix<cpp_int, 5, 4>;

Lattice const L(
{C, 0, std::get<0>(P̃₀_coefficients), std::get<0>(P̃₁_coefficients),
0, C * T, std::get<1>(P̃₀_coefficients), std::get<1>(P̃₁_coefficients),
0, 0, std::get<2>(P̃₀_coefficients), std::get<2>(P̃₁_coefficients),
0, 0, 3, 0,
0, 0, 0, 3});
VLOG(1) << "L = " << L;

// Step 7: reduce the lattice.
// The lattice really has integer coefficients, but this is inconvenient to
// propagate through the matrix algorithms. (It would require copies instead
// of views for all the types, not just the ones we use here.)
Lattice const V = ToInt(LenstraLenstraLovász(ToRational(L)));
VLOG(1) << "V = " << V;

// Step 8: find the three shortest vectors of the reduced lattice. We sort
// the columns according to the L₂ norm.
std::array<std::unique_ptr<ColumnView<Lattice const>>, V.columns()> v;
for (std::int64_t i = 0; i < v.size(); ++i) {
// TODO(phl): Switch the matrices to use std::int64_t instead of int, and
// the cast below will go away.
v[i] = std::make_unique<ColumnView<Lattice const>>(
ColumnView<Lattice const>{.matrix = V,
.first_row = 0,
.last_row = V.rows() - 1,
.column = static_cast<int>(i)});
}
std::sort(v.begin(),
v.end(),
[](std::unique_ptr<ColumnView<Lattice const>> const& left,
std::unique_ptr<ColumnView<Lattice const>> const& right) {
return left->Norm²() < right->Norm²();
});

// Step 9: check that the shortest vectors are short enough.
auto norm1 = [](ColumnView<Lattice const> const& v) {
cpp_int norm1 = 0;
for (std::int64_t i = 0; i < v.size(); ++i) {
norm1 += abs(v[i]);
}
return norm1;
};

static constexpr std::int64_t dimension = 3;
FixedMatrix<cpp_rational, 5, dimension> w;
for (std::int64_t i = 0; i < dimension; ++i) {
auto const& v_i = *v[i];
VLOG(1) << "v[" << i << "] = " << v_i;
if (norm1(v_i) >= C) {
return absl::OutOfRangeError("Vectors too big");
}
}

// Step 10: compute Q by eliminating the last two variables. Give up if the
// degree 1 coefficient is 0, there is no solution.
std::array<cpp_int, dimension> Q_multipliers;
for (std::int64_t i = 0; i < dimension; ++i) {
auto const& v_i1 = *v[(i + 1) % dimension];
auto const& v_i2 = *v[(i + 2) % dimension];
Q_multipliers[i] = v_i1[3] * v_i2[4] - v_i1[4] * v_i2[3];
}

FixedVector<cpp_int, 2> Q_coefficients{};
for (std::int64_t i = 0; i < dimension; ++i) {
auto const& v_i = *v[i];
for (std::int64_t j = 0; j < Q_coefficients.size(); ++j) {
Q_coefficients[j] += Q_multipliers[i] * v_i[j];
}
}

if (Q_coefficients[1] == 0) {
return absl::NotFoundError("No integer zeroes");
}

AccuratePolynomial<cpp_rational, 1> const Q({Q_coefficients[0],
Q_coefficients[1]});
VLOG(1) << "Q = " << Q;

// Step 11: compute q and find its integer root (singular), if any.
AccuratePolynomial<cpp_rational, 1> const q =
Compose(Q, AccuratePolynomial<cpp_rational, 1>({0, cpp_rational(1, T)}));

cpp_rational const t₀ =
-std::get<0>(q.coefficients()) / std::get<1>(q.coefficients());
VLOG(1) << "t₀ = " << t₀;
if (abs(t₀) > T) {
return absl::NotFoundError("Out of bounds");
} else if (denominator(t₀) != 1) {
return absl::NotFoundError("Noninteger root");
}

for (auto const& Fi : F) {
auto const Fi_t₀ = Fi(t₀);
if (M * abs(Fi_t₀ - round(Fi_t₀)) >= 1) {
LOG(ERROR) << Fi_t₀ - round(Fi_t₀);
return absl::NotFoundError("Not enough zeroes");
}
}

return t₀ / N + near_argument;
}

} // namespace internal
} // namespace _accurate_table_generator
} // namespace functions
Expand Down
Loading