Skip to content

Commit 7fa8f84

Browse files
authored
chore: Optimise grand product computation round based on active ranges (#10460)
Skip computation of numerator and denominator of z_perm at indexes that are not part of the active ranges, and refine the threshold in commit_structured for `z_perm`. New benchmarks: **Now: 5.6 s difference** We still see a difference between committing to z_perm between an ambient trace of 2^19 and 2^20, caused by the fact that the active ranges complement are larger (i.e. the ranges in the trace blocks where z_perm is constant) because the blocks themselves are larger. We make sure to at least avoid computing and committing to z_perm after the final active wire index.
1 parent f5de7d1 commit 7fa8f84

File tree

13 files changed

+79
-44
lines changed

13 files changed

+79
-44
lines changed

barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp

+31-28
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ template <class Curve> class CommitmentKey {
8585
*/
8686
Commitment commit(PolynomialSpan<const Fr> polynomial)
8787
{
88-
PROFILE_THIS();
88+
PROFILE_THIS_NAME("commit");
8989
// We must have a power-of-2 SRS points *after* subtracting by start_index.
9090
size_t dyadic_poly_size = numeric::round_up_power_2(polynomial.size());
9191
// Because pippenger prefers a power-of-2 size, we must choose a starting index for the points so that we don't
@@ -133,7 +133,7 @@ template <class Curve> class CommitmentKey {
133133
*/
134134
Commitment commit_sparse(PolynomialSpan<const Fr> polynomial)
135135
{
136-
PROFILE_THIS();
136+
PROFILE_THIS_NAME("commit_sparse");
137137
const size_t poly_size = polynomial.size();
138138
ASSERT(polynomial.end_index() <= srs->get_monomial_size());
139139

@@ -204,21 +204,24 @@ template <class Curve> class CommitmentKey {
204204
* @return Commitment
205205
*/
206206
Commitment commit_structured(PolynomialSpan<const Fr> polynomial,
207-
const std::vector<std::pair<size_t, size_t>>& active_ranges)
207+
const std::vector<std::pair<size_t, size_t>>& active_ranges,
208+
size_t final_active_wire_idx = 0)
208209
{
209-
BB_OP_COUNT_TIME();
210+
PROFILE_THIS_NAME("commit_structured");
210211
ASSERT(polynomial.end_index() <= srs->get_monomial_size());
211212

212213
// Percentage of nonzero coefficients beyond which we resort to the conventional commit method
213214
constexpr size_t NONZERO_THRESHOLD = 75;
214215

216+
// Compute the number of non-zero coefficients in the polynomial
215217
size_t total_num_scalars = 0;
216-
for (const auto& range : active_ranges) {
217-
total_num_scalars += range.second - range.first;
218+
for (const auto& [first, second] : active_ranges) {
219+
total_num_scalars += second - first;
218220
}
219221

220222
// Compute "active" percentage of polynomial; resort to standard commit if appropriate
221-
size_t percentage_nonzero = total_num_scalars * 100 / polynomial.size();
223+
size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
224+
size_t percentage_nonzero = total_num_scalars * 100 / polynomial_size;
222225
if (percentage_nonzero > NONZERO_THRESHOLD) {
223226
return commit(polynomial);
224227
}
@@ -259,9 +262,10 @@ template <class Curve> class CommitmentKey {
259262
* @return Commitment
260263
*/
261264
Commitment commit_structured_with_nonzero_complement(PolynomialSpan<const Fr> polynomial,
262-
const std::vector<std::pair<size_t, size_t>>& active_ranges)
265+
const std::vector<std::pair<size_t, size_t>>& active_ranges,
266+
size_t final_active_wire_idx = 0)
263267
{
264-
BB_OP_COUNT_TIME();
268+
PROFILE_THIS_NAME("commit_structured_with_nonzero_complement");
265269
ASSERT(polynomial.end_index() <= srs->get_monomial_size());
266270

267271
using BatchedAddition = BatchedAffineAddition<Curve>;
@@ -273,20 +277,21 @@ template <class Curve> class CommitmentKey {
273277
// Note: the range from the end of the last active range to the end of the polynomial is excluded from the
274278
// complement since the polynomial is assumed to be zero there.
275279
std::vector<std::pair<size_t, size_t>> active_ranges_complement;
280+
// Also compute total number of scalars in the constant regions
281+
size_t total_num_complement_scalars = 0;
276282
for (size_t i = 0; i < active_ranges.size() - 1; ++i) {
277283
const size_t start = active_ranges[i].second;
278284
const size_t end = active_ranges[i + 1].first;
279-
active_ranges_complement.emplace_back(start, end);
280-
}
281-
282-
// Compute the total number of scalars in the constant regions
283-
size_t total_num_complement_scalars = 0;
284-
for (const auto& range : active_ranges_complement) {
285-
total_num_complement_scalars += range.second - range.first;
285+
if (end > start) {
286+
active_ranges_complement.emplace_back(start, end);
287+
total_num_complement_scalars += end - start;
288+
}
286289
}
287290

291+
size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
288292
// Compute percentage of polynomial comprised of constant blocks; resort to standard commit if appropriate
289-
size_t percentage_constant = total_num_complement_scalars * 100 / polynomial.size();
293+
size_t percentage_constant = total_num_complement_scalars * 100 / polynomial_size;
294+
290295
if (percentage_constant < CONSTANT_THRESHOLD) {
291296
return commit(polynomial);
292297
}
@@ -299,12 +304,11 @@ template <class Curve> class CommitmentKey {
299304
// TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by
300305
// performing this copy and the subsequent summation as a precomputation prior to constructing the point table.
301306
std::vector<G1> points;
302-
points.reserve(2 * total_num_complement_scalars);
303-
for (const auto& range : active_ranges_complement) {
304-
const size_t start = 2 * range.first;
305-
const size_t end = 2 * range.second;
306-
for (size_t i = start; i < end; i += 2) {
307-
points.emplace_back(point_table[i]);
307+
308+
points.reserve(total_num_complement_scalars);
309+
for (const auto& [start, end] : active_ranges_complement) {
310+
for (size_t i = start; i < end; i++) {
311+
points.emplace_back(point_table[2 * i]);
308312
}
309313
}
310314

@@ -313,17 +317,16 @@ template <class Curve> class CommitmentKey {
313317
std::vector<Fr> unique_scalars;
314318
std::vector<size_t> sequence_counts;
315319
for (const auto& range : active_ranges_complement) {
316-
if (range.second - range.first > 0) { // only ranges with nonzero length
317-
unique_scalars.emplace_back(polynomial.span[range.first]);
318-
sequence_counts.emplace_back(range.second - range.first);
319-
}
320+
unique_scalars.emplace_back(polynomial.span[range.first]);
321+
sequence_counts.emplace_back(range.second - range.first);
320322
}
321323

322324
// Reduce each sequence to a single point
323325
auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts);
324326

325327
// Compute the full commitment as the sum of the "active" region commitment and the constant region contribution
326-
Commitment result = commit_structured(polynomial, active_ranges);
328+
Commitment result = commit_structured(polynomial, active_ranges, final_active_wire_idx);
329+
327330
for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) {
328331
result = result + point * scalar;
329332
}

barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ template <typename Curve>
1010
std::vector<typename BatchedAffineAddition<Curve>::G1> BatchedAffineAddition<Curve>::add_in_place(
1111
const std::span<G1>& points, const std::vector<size_t>& sequence_counts)
1212
{
13+
PROFILE_THIS_NAME("BatchedAffineAddition::add_in_place");
1314
// Instantiate scratch space for point addition denominators and their calculation
1415
std::vector<Fq> scratch_space_vector(points.size());
1516
std::span<Fq> scratch_space(scratch_space_vector);

barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,10 @@ template <class T> void field<T>::batch_invert(field* coeffs, const size_t n) no
404404
batch_invert(std::span{ coeffs, n });
405405
}
406406

407+
// TODO(https://github.com/AztecProtocol/barretenberg/issues/1166)
407408
template <class T> void field<T>::batch_invert(std::span<field> coeffs) noexcept
408409
{
409-
BB_OP_COUNT_TRACK_NAME("fr::batch_invert");
410+
PROFILE_THIS_NAME("fr::batch_invert");
410411
const size_t n = coeffs.size();
411412

412413
auto temporaries_ptr = std::static_pointer_cast<field[]>(get_mem_slab(n * sizeof(field)));

barretenberg/cpp/src/barretenberg/flavor/flavor.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ template <typename FF, typename CommitmentKey_> class ProvingKey_ {
123123
// folded element by element.
124124
std::vector<FF> public_inputs;
125125

126-
// Ranges of the form [start, end) over which the execution trace is "active"
126+
// Ranges of the form [start, end) where witnesses have non-zero values (hence the execution trace is "active")
127127
std::vector<std::pair<size_t, size_t>> active_block_ranges;
128128

129129
ProvingKey_() = default;

barretenberg/cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct ExecutionTraceUsageTracker {
2222
MegaTraceFixedBlockSizes fixed_sizes; // fixed size of each block prescribed by structuring
2323
// Store active ranges based on the most current accumulator and those based on all but the most recently
2424
// accumulated circuit. The former is needed for the combiner calculation and the latter for the perturbator.
25+
// The ranges cover all areas in the trace where relations have nontrivial values.
2526
std::vector<Range> active_ranges;
2627
std::vector<Range> previous_active_ranges;
2728

barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp

+26-10
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,11 @@ namespace bb {
5656
template <typename Flavor, typename GrandProdRelation>
5757
void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
5858
bb::RelationParameters<typename Flavor::FF>& relation_parameters,
59-
size_t size_override = 0)
59+
size_t size_override = 0,
60+
std::vector<std::pair<size_t, size_t>> active_block_ranges = {})
6061
{
62+
PROFILE_THIS_NAME("compute_grand_product");
63+
6164
using FF = typename Flavor::FF;
6265
using Polynomial = typename Flavor::Polynomial;
6366
using Accumulator = std::tuple_element_t<0, typename GrandProdRelation::SumcheckArrayOfValuesOverSubrelations>;
@@ -84,22 +87,34 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
8487
Polynomial numerator{ domain_size, domain_size };
8588
Polynomial denominator{ domain_size, domain_size };
8689

90+
auto check_is_active = [&](size_t idx) {
91+
if (active_block_ranges.empty()) {
92+
return true;
93+
}
94+
return std::any_of(active_block_ranges.begin(), active_block_ranges.end(), [idx](const auto& range) {
95+
return idx >= range.first && idx < range.second;
96+
});
97+
};
98+
8799
// Step (1)
88100
// Populate `numerator` and `denominator` with the algebra described by Relation
101+
FF gamma_fourth = relation_parameters.gamma.pow(4);
89102
parallel_for(num_threads, [&](size_t thread_idx) {
90-
typename Flavor::AllValues evaluations;
91-
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): construction of evaluations is equivalent to
92-
// calling get_row which creates full copies. avoid?
103+
typename Flavor::AllValues row;
93104
const size_t start = idx_bounds[thread_idx].first;
94105
const size_t end = idx_bounds[thread_idx].second;
95106
for (size_t i = start; i < end; ++i) {
96-
for (auto [eval, full_poly] : zip_view(evaluations.get_all(), full_polynomials.get_all())) {
97-
eval = full_poly.size() > i ? full_poly[i] : 0;
107+
if (check_is_active(i)) {
108+
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940):consider avoiding get_row if possible.
109+
row = full_polynomials.get_row(i);
110+
numerator.at(i) =
111+
GrandProdRelation::template compute_grand_product_numerator<Accumulator>(row, relation_parameters);
112+
denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
113+
row, relation_parameters);
114+
} else {
115+
numerator.at(i) = gamma_fourth;
116+
denominator.at(i) = gamma_fourth;
98117
}
99-
numerator.at(i) = GrandProdRelation::template compute_grand_product_numerator<Accumulator>(
100-
evaluations, relation_parameters);
101-
denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
102-
evaluations, relation_parameters);
103118
}
104119
});
105120

@@ -163,6 +178,7 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
163178
auto& grand_product_polynomial = GrandProdRelation::get_grand_product_polynomial(full_polynomials);
164179
// We have a 'virtual' 0 at the start (as this is a to-be-shifted polynomial)
165180
ASSERT(grand_product_polynomial.start_index() == 1);
181+
166182
parallel_for(num_threads, [&](size_t thread_idx) {
167183
const size_t start = idx_bounds[thread_idx].first;
168184
const size_t end = idx_bounds[thread_idx].second;

barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ template <typename FF_> class DatabusLookupRelationImpl {
231231
auto& relation_parameters,
232232
const size_t circuit_size)
233233
{
234+
PROFILE_THIS_NAME("Databus::compute_logderivative_inverse");
234235
auto& inverse_polynomial = BusData<bus_idx, Polynomials>::inverses(polynomials);
235236

236237
size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread

barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ template <typename FF_> class LogDerivLookupRelationImpl {
156156
auto& relation_parameters,
157157
const size_t circuit_size)
158158
{
159+
PROFILE_THIS_NAME("Lookup::compute_logderivative_inverse");
159160
auto& inverse_polynomial = get_inverse_polynomial(polynomials);
160161

161162
size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread

barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,8 @@ class MegaFlavor {
492492
*/
493493
void compute_logderivative_inverses(const RelationParameters<FF>& relation_parameters)
494494
{
495+
PROFILE_THIS_NAME("compute_logderivative_inverses");
496+
495497
// Compute inverses for conventional lookups
496498
LogDerivLookupRelation<FF>::compute_logderivative_inverse(
497499
this->polynomials, relation_parameters, this->circuit_size);
@@ -525,7 +527,7 @@ class MegaFlavor {
525527

526528
// Compute permutation grand product polynomial
527529
compute_grand_product<MegaFlavor, UltraPermutationRelation<FF>>(
528-
this->polynomials, relation_parameters, size_override);
530+
this->polynomials, relation_parameters, size_override, this->active_block_ranges);
529531
}
530532

531533
uint64_t estimate_memory()

barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ class UltraFlavor {
328328
[[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); }
329329
[[nodiscard]] AllValues get_row(const size_t row_idx) const
330330
{
331-
PROFILE_THIS();
331+
PROFILE_THIS_NAME("UltraFlavor::get_row");
332332
AllValues result;
333333
for (auto [result_field, polynomial] : zip_view(result.get_all(), get_all())) {
334334
result_field = polynomial[row_idx];

barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ typename TraceToPolynomials<Flavor>::TraceData TraceToPolynomials<Flavor>::const
150150
// otherwise, the next block starts immediately following the previous one
151151
offset += block.get_fixed_size(is_structured);
152152
}
153+
153154
return trace_data;
154155
}
155156

barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
235235
{
236236
PROFILE_THIS_NAME("OinkProver::execute_grand_product_computation_round");
237237
// Compute the permutation grand product polynomial
238+
238239
proving_key->proving_key.compute_grand_product_polynomial(proving_key->relation_parameters,
239240
proving_key->final_active_wire_idx + 1);
240241

@@ -243,7 +244,9 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
243244
if (proving_key->get_is_structured()) {
244245
witness_commitments.z_perm =
245246
proving_key->proving_key.commitment_key->commit_structured_with_nonzero_complement(
246-
proving_key->proving_key.polynomials.z_perm, proving_key->proving_key.active_block_ranges);
247+
proving_key->proving_key.polynomials.z_perm,
248+
proving_key->proving_key.active_block_ranges,
249+
proving_key->final_active_wire_idx + 1);
247250
} else {
248251
witness_commitments.z_perm =
249252
proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.z_perm);

barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
// clang-format on
2020
#include <utility>
2121

22+
#include "barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp"
2223
#include "barretenberg/ultra_honk/decider_proving_key.hpp"
2324

2425
namespace bb {
@@ -40,16 +41,20 @@ template <IsUltraFlavor Flavor> class OinkProver {
4041
std::shared_ptr<DeciderPK> proving_key;
4142
std::shared_ptr<Transcript> transcript;
4243
std::string domain_separator;
44+
ExecutionTraceUsageTracker trace_usage_tracker;
45+
4346
typename Flavor::WitnessCommitments witness_commitments;
4447
typename Flavor::CommitmentLabels commitment_labels;
4548
using RelationSeparator = typename Flavor::RelationSeparator;
4649

4750
OinkProver(std::shared_ptr<DeciderPK> proving_key,
4851
const std::shared_ptr<typename Flavor::Transcript>& transcript = std::make_shared<Transcript>(),
49-
std::string domain_separator = "")
52+
std::string domain_separator = "",
53+
const ExecutionTraceUsageTracker& trace_usage_tracker = ExecutionTraceUsageTracker{})
5054
: proving_key(proving_key)
5155
, transcript(transcript)
5256
, domain_separator(std::move(domain_separator))
57+
, trace_usage_tracker(trace_usage_tracker)
5358
{}
5459

5560
void prove();

0 commit comments

Comments
 (0)