Skip to content

Commit 80fad45

Browse files
authored
feat: Avoid inserting an empty leaf in indexed trees on update (#10334)
Fix network kind tests
1 parent 24abcfe commit 80fad45

File tree

16 files changed

+491
-251
lines changed

16 files changed

+491
-251
lines changed

barretenberg/cpp/src/barretenberg/crypto/merkle_tree/indexed_tree/content_addressed_indexed_tree.hpp

+73-47
Original file line numberDiff line numberDiff line change
@@ -251,18 +251,20 @@ class ContentAddressedIndexedTree : public ContentAddressedAppendOnlyTree<Store,
251251
const InsertionGenerationCallback& completion);
252252

253253
struct InsertionUpdates {
254+
// On insertion, we always update a low leaf. If it's creating a new leaf, we need to update the pointer to
255+
// point to the new one, if it's an update to an existing leaf, we need to change its payload.
254256
LeafUpdate low_leaf_update;
255-
IndexedLeafValueType new_leaf;
256-
index_t new_leaf_index;
257+
// We don't create new leaves on update
258+
std::optional<std::pair<IndexedLeafValueType, index_t>> new_leaf;
257259
};
258260

259261
struct SequentialInsertionGenerationResponse {
260-
std::shared_ptr<std::vector<InsertionUpdates>> updates_to_perform;
262+
std::vector<InsertionUpdates> updates_to_perform;
261263
index_t highest_index;
262264
};
263265

264266
using SequentialInsertionGenerationCallback =
265-
std::function<void(const TypedResponse<SequentialInsertionGenerationResponse>&)>;
267+
std::function<void(TypedResponse<SequentialInsertionGenerationResponse>&)>;
266268
void generate_sequential_insertions(const std::vector<LeafValueType>& values,
267269
const SequentialInsertionGenerationCallback& completion);
268270

@@ -1421,6 +1423,14 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq
14211423
const AddSequentiallyCompletionCallbackWithWitness& completion,
14221424
bool capture_witness)
14231425
{
1426+
1427+
// This struct is used to collect some state from the asynchronous operations we are about to perform
1428+
struct IntermediateResults {
1429+
std::vector<InsertionUpdates> updates_to_perform;
1430+
size_t appended_leaves = 0;
1431+
};
1432+
auto results = std::make_shared<IntermediateResults>();
1433+
14241434
auto on_error = [=](const std::string& message) {
14251435
try {
14261436
TypedResponse<AddIndexedDataSequentiallyResponse<LeafValueType>> response;
@@ -1442,7 +1452,7 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq
14421452
ReadTransactionPtr tx = store_->create_read_transaction();
14431453
store_->get_meta(meta, *tx, true);
14441454

1445-
index_t new_total_size = values.size() + meta.size;
1455+
index_t new_total_size = results->appended_leaves + meta.size;
14461456
meta.size = new_total_size;
14471457
meta.root = store_->get_current_root(*tx, true);
14481458

@@ -1451,23 +1461,29 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq
14511461

14521462
if (capture_witness) {
14531463
// Split results->update_witnesses between low_leaf_witness_data and insertion_witness_data
1454-
// Currently we always insert an empty leaf, even if it's an update, so we can split based
1455-
// on the index of the witness data
14561464
response.inner.insertion_witness_data =
14571465
std::make_shared<std::vector<LeafUpdateWitnessData<LeafValueType>>>();
1458-
;
1466+
response.inner.insertion_witness_data->reserve(results->updates_to_perform.size());
1467+
14591468
response.inner.low_leaf_witness_data =
14601469
std::make_shared<std::vector<LeafUpdateWitnessData<LeafValueType>>>();
1461-
;
1462-
1463-
for (size_t i = 0; i < updates_completion_response.inner.update_witnesses->size(); ++i) {
1464-
LeafUpdateWitnessData<LeafValueType>& witness_data =
1465-
updates_completion_response.inner.update_witnesses->at(i);
1466-
// If even, it's a low leaf, if odd, it's an insertion witness
1467-
if (i % 2 == 0) {
1468-
response.inner.low_leaf_witness_data->push_back(witness_data);
1470+
response.inner.low_leaf_witness_data->reserve(results->updates_to_perform.size());
1471+
1472+
size_t current_witness_index = 0;
1473+
for (size_t i = 0; i < results->updates_to_perform.size(); ++i) {
1474+
LeafUpdateWitnessData<LeafValueType> low_leaf_witness =
1475+
updates_completion_response.inner.update_witnesses->at(current_witness_index++);
1476+
response.inner.low_leaf_witness_data->push_back(low_leaf_witness);
1477+
1478+
// If this update has an insertion, append the real witness
1479+
if (results->updates_to_perform.at(i).new_leaf.has_value()) {
1480+
LeafUpdateWitnessData<LeafValueType> insertion_witness =
1481+
updates_completion_response.inner.update_witnesses->at(current_witness_index++);
1482+
response.inner.insertion_witness_data->push_back(insertion_witness);
14691483
} else {
1470-
response.inner.insertion_witness_data->push_back(witness_data);
1484+
// If it's an update, append an empty witness
1485+
response.inner.insertion_witness_data->push_back(LeafUpdateWitnessData<LeafValueType>(
1486+
IndexedLeafValueType::empty(), 0, std::vector<fr>(depth_)));
14711487
}
14721488
}
14731489
}
@@ -1479,23 +1495,33 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::add_or_update_values_seq
14791495
// This signals the completion of the insertion data generation
14801496
// Here we'll perform all updates to the tree
14811497
SequentialInsertionGenerationCallback insertion_generation_completed =
1482-
[=, this](const TypedResponse<SequentialInsertionGenerationResponse>& insertion_response) {
1498+
[=, this](TypedResponse<SequentialInsertionGenerationResponse>& insertion_response) {
14831499
if (!insertion_response.success) {
14841500
on_error(insertion_response.message);
14851501
return;
14861502
}
14871503

14881504
std::shared_ptr<std::vector<LeafUpdate>> flat_updates = std::make_shared<std::vector<LeafUpdate>>();
1489-
for (size_t i = 0; i < insertion_response.inner.updates_to_perform->size(); ++i) {
1490-
InsertionUpdates& insertion_update = insertion_response.inner.updates_to_perform->at(i);
1505+
flat_updates->reserve(insertion_response.inner.updates_to_perform.size() * 2);
1506+
1507+
for (size_t i = 0; i < insertion_response.inner.updates_to_perform.size(); ++i) {
1508+
InsertionUpdates& insertion_update = insertion_response.inner.updates_to_perform.at(i);
14911509
flat_updates->push_back(insertion_update.low_leaf_update);
1492-
flat_updates->push_back(LeafUpdate{
1493-
.leaf_index = insertion_update.new_leaf_index,
1494-
.updated_leaf = insertion_update.new_leaf,
1495-
.original_leaf = IndexedLeafValueType::empty(),
1496-
});
1510+
if (insertion_update.new_leaf.has_value()) {
1511+
results->appended_leaves++;
1512+
IndexedLeafValueType new_leaf;
1513+
index_t new_leaf_index = 0;
1514+
std::tie(new_leaf, new_leaf_index) = insertion_update.new_leaf.value();
1515+
flat_updates->push_back(LeafUpdate{
1516+
.leaf_index = new_leaf_index,
1517+
.updated_leaf = new_leaf,
1518+
.original_leaf = IndexedLeafValueType::empty(),
1519+
});
1520+
}
14971521
}
1498-
1522+
// We won't use anymore updates_to_perform
1523+
results->updates_to_perform = std::move(insertion_response.inner.updates_to_perform);
1524+
assert(insertion_response.inner.updates_to_perform.size() == 0);
14991525
if (capture_witness) {
15001526
perform_updates(flat_updates->size(), flat_updates, final_completion);
15011527
return;
@@ -1513,27 +1539,12 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse
15131539
{
15141540
execute_and_report<SequentialInsertionGenerationResponse>(
15151541
[=, this](TypedResponse<SequentialInsertionGenerationResponse>& response) {
1516-
response.inner.highest_index = 0;
1517-
response.inner.updates_to_perform = std::make_shared<std::vector<InsertionUpdates>>();
1518-
15191542
TreeMeta meta;
15201543
ReadTransactionPtr tx = store_->create_read_transaction();
15211544
store_->get_meta(meta, *tx, true);
15221545

15231546
RequestContext requestContext;
15241547
requestContext.includeUncommitted = true;
1525-
// Ensure that the tree is not going to be overfilled
1526-
index_t new_total_size = values.size() + meta.size;
1527-
if (new_total_size > max_size_) {
1528-
throw std::runtime_error(format("Unable to insert values into tree ",
1529-
meta.name,
1530-
" new size: ",
1531-
new_total_size,
1532-
" max size: ",
1533-
max_size_));
1534-
}
1535-
// The highest index touched will be the last leaf index, since we append a zero for updates
1536-
response.inner.highest_index = new_total_size - 1;
15371548
requestContext.root = store_->get_current_root(*tx, true);
15381549
// Fetch the frontier (non empty nodes to the right) of the tree. This will ensure that perform_updates or
15391550
// perform_updates_without_witness has all the cached nodes it needs to perform the insertions. See comment
@@ -1542,12 +1553,15 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse
15421553
find_leaf_hash(meta.size - 1, requestContext, *tx, true);
15431554
}
15441555

1556+
index_t current_size = meta.size;
1557+
15451558
for (size_t i = 0; i < values.size(); ++i) {
15461559
const LeafValueType& new_payload = values[i];
1560+
// TODO(Alvaro) - Rethink this. I think it's fine for us to interpret empty values as a regular update
1561+
// (it'd empty out the payload of the zero leaf)
15471562
if (new_payload.is_empty()) {
15481563
continue;
15491564
}
1550-
index_t index_of_new_leaf = i + meta.size;
15511565
fr value = new_payload.get_key();
15521566

15531567
// This gives us the leaf that need updating
@@ -1594,25 +1608,25 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse
15941608
.updated_leaf = IndexedLeafValueType::empty(),
15951609
.original_leaf = low_leaf,
15961610
},
1597-
.new_leaf = IndexedLeafValueType::empty(),
1598-
.new_leaf_index = index_of_new_leaf,
1611+
.new_leaf = std::nullopt,
15991612
};
16001613

16011614
if (!is_already_present) {
16021615
// Update the current leaf to point it to the new leaf
16031616
IndexedLeafValueType new_leaf =
16041617
IndexedLeafValueType(new_payload, low_leaf.nextIndex, low_leaf.nextValue);
1605-
1618+
index_t index_of_new_leaf = current_size;
16061619
low_leaf.nextIndex = index_of_new_leaf;
16071620
low_leaf.nextValue = value;
1621+
current_size++;
16081622
// Cache the new leaf
16091623
store_->set_leaf_key_at_index(index_of_new_leaf, new_leaf);
16101624
store_->put_cached_leaf_by_index(index_of_new_leaf, new_leaf);
16111625
// Update cached low leaf
16121626
store_->put_cached_leaf_by_index(low_leaf_index, low_leaf);
16131627

16141628
insertion_update.low_leaf_update.updated_leaf = low_leaf;
1615-
insertion_update.new_leaf = new_leaf;
1629+
insertion_update.new_leaf = std::pair(new_leaf, index_of_new_leaf);
16161630
} else if (IndexedLeafValueType::is_updateable()) {
16171631
// Update the current leaf's value, don't change it's link
16181632
IndexedLeafValueType replacement_leaf =
@@ -1630,8 +1644,20 @@ void ContentAddressedIndexedTree<Store, HashingPolicy>::generate_sequential_inse
16301644
" is already present"));
16311645
}
16321646

1633-
response.inner.updates_to_perform->push_back(insertion_update);
1647+
response.inner.updates_to_perform.push_back(insertion_update);
1648+
}
1649+
1650+
// Ensure that the tree is not going to be overfilled
1651+
if (current_size > max_size_) {
1652+
throw std::runtime_error(format("Unable to insert values into tree ",
1653+
meta.name,
1654+
" new size: ",
1655+
current_size,
1656+
" max size: ",
1657+
max_size_));
16341658
}
1659+
// The highest index touched will be current_size - 1
1660+
response.inner.highest_index = current_size - 1;
16351661
},
16361662
completion);
16371663
}

0 commit comments

Comments
 (0)