Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup of previous commit "simplify relocate" #99

Merged
merged 4 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Added B+tree multimap for internal (future) use. [#93](https://github.com/tzaeschke/phtree-cpp/issues/93)

### Changed
- Rewrote relocate(). This should be much cleaner now and slightly faster. [#98](https://github.com/tzaeschke/phtree-cpp/pull/98)
- Rewrote relocate(). This should be much cleaner now and slightly faster.
[#98](https://github.com/tzaeschke/phtree-cpp/pull/98), [#99](https://github.com/tzaeschke/phtree-cpp/pull/99)

- Cleaned up HandleCollision() and key comparison functions. [#97](https://github.com/tzaeschke/phtree-cpp/pull/97)
- Improved performance by eliminating memory indirection for DIM > 3.
This was enabled by referencing "Node" directly in "Entry" which was enabled by
implanting an indirection in array_map. [#96](https://github.com/tzaeschke/phtree-cpp/pull/96)
- Improved performance of window queries by executing them partially as point queries.
This works best for point datasets, and somewhat for box datasets with "include" queries.
There is no benefit for "intersection" queries. [#88](https://github.com/tzaeschke/phtree-cpp/issues/88)
- Improved benchmarks for insert and query to use a more compact format. [#91](https://github.com/tzaeschke/phtree-cpp/pull/91)
- Improved benchmarks for insert and query to use a more compact format.
[#91](https://github.com/tzaeschke/phtree-cpp/pull/91)
- Improved performance of window queries by optimizing calculation of min/max masks.
Improved performance of queries and updates by changing bit-width of min/max masks and
hc_pos_t. [#95](https://github.com/tzaeschke/phtree-cpp/pull/95)
Expand Down
7 changes: 2 additions & 5 deletions benchmark/update_mm_d_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ typename std::enable_if<
UpdateEntry(TestMap<SCENARIO, DIM>& tree, std::vector<UpdateOp<DIM>>& updates) {
size_t n = 0;
for (auto& update : updates) {
n += tree.relocate(update.old_, update.new_, update.id_, false);
n += tree.relocate(update.old_, update.new_, update.id_);
}
return n;
}
Expand All @@ -177,10 +177,7 @@ typename std::enable_if<SCENARIO == Scenario::MM_SET_RELOCATE_IF, size_t>::type
size_t n = 0;
for (auto& update : updates) {
n += tree.relocate_if(
update.old_,
update.new_,
[&update](const payload_t& v) { return v == update.id_; },
false);
update.old_, update.new_, [&update](const payload_t& v) { return v == update.id_; });
}
return n;
}
Expand Down
35 changes: 0 additions & 35 deletions include/phtree/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,6 @@ static bit_width_t NumberOfDivergingBits(
return MAX_BIT_WIDTH<SCALAR> - CountLeadingZeros(diff2);
}

//template <dimension_t DIM, typename SCALAR>
//static bit_width_t NumberOfDivergingBits2(
// const PhPoint<DIM, SCALAR>& v1, const PhPoint<DIM, SCALAR>& v2) {
// // write all differences to diff, we just check diff afterwards
// SCALAR diff = 0;
// //bit_mask_t<SCALAR> diff = 0;
// for (dimension_t i = 0; i < DIM; ++i) {
// diff |= v1[i] ^ v2[i];
// }
// bit_mask_t<SCALAR> diff2 = reinterpret_cast<bit_mask_t<SCALAR>&>(diff);
// assert(CountLeadingZeros(diff2) <= MAX_BIT_WIDTH<SCALAR>);
// return MAX_BIT_WIDTH<SCALAR> - CountLeadingZeros(diff2);
//}


template <dimension_t DIM, typename SCALAR>
static bool KeyEquals(
const PhPoint<DIM, SCALAR>& key_a, const PhPoint<DIM, SCALAR>& key_b, bit_width_t ignore_bits) {
Expand All @@ -127,26 +112,6 @@ static bool KeyEquals(
}
return diff >> ignore_bits == 0;
}
//template <dimension_t DIM, typename SCALAR>
//static bool KeyEquals0(
// const PhPoint<DIM, SCALAR>& key_a, const PhPoint<DIM, SCALAR>& key_b, SCALAR mask) {
// for (dimension_t i = 0; i < DIM; ++i) {
// if (((key_a[i] ^ key_b[i]) & mask) != 0) {
// return false;
// }
// }
// return true;
//}
//
//template <dimension_t DIM, typename SCALAR>
//static bool KeyEquals1(
// const PhPoint<DIM, SCALAR>& key_a, const PhPoint<DIM, SCALAR>& key_b, SCALAR mask) {
// SCALAR sum = 0;
// for (dimension_t i = 0; i < DIM; ++i) {
// sum |= (key_a[i] ^ key_b[i]);
// }
// return (sum & mask) == 0;
//}

// ************************************************************************
// String helpers
Expand Down
24 changes: 5 additions & 19 deletions include/phtree/common/flat_sparse_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,15 @@ class sparse_map {
}

template <typename... Args>
auto emplace(Args&&... args) {
return try_emplace_base(std::forward<Args>(args)...);
auto emplace(size_t key, Args&&... args) {
auto iter = lower_bound(key);
return try_emplace_base(iter, key, std::forward<Args>(args)...);
}

template <typename... Args>
auto try_emplace(size_t key, Args&&... args) {
return try_emplace_base(key, std::forward<Args>(args)...);
auto iter = lower_bound(key);
return try_emplace_base(iter, key, std::forward<Args>(args)...);
}

template <typename... Args>
Expand Down Expand Up @@ -136,22 +138,6 @@ class sparse_map {
}
}

template <typename... Args>
auto try_emplace_base(KeyT key, Args&&... args) {
auto it = lower_bound(key);
if (it != end() && it->first == key) {
return std::make_pair(it, false);
} else {
auto x = data_.emplace(
it,
std::piecewise_construct,
std::forward_as_tuple(key),
std::forward_as_tuple(std::forward<Args>(args)...));
return std::make_pair(x, true);
}
}

// TODO merge with above
template <typename... Args>
auto try_emplace_base(const iterator& it, KeyT key, Args&&... args) {
if (it != end() && it->first == key) {
Expand Down
5 changes: 0 additions & 5 deletions include/phtree/v16/entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,6 @@ class Entry {
kd_key_ = key;
}

void SetValue(T&& value) noexcept {
assert(union_type_ == VALUE);
value_ = std::move(value);
}

void SetNode(NodeT&& node, bit_width_t postfix_len) noexcept {
postfix_len_ = static_cast<std::uint16_t>(postfix_len);
DestroyUnion();
Expand Down
7 changes: 2 additions & 5 deletions include/phtree/v16/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ using EntryMap = typename std::conditional_t<
DIM <= 8,
sparse_map<hc_pos_dim_t<DIM>, Entry>,
b_plus_tree_map<std::uint64_t, Entry, (uint64_t(1) << DIM)>>>;
//template <dimension_t DIM, typename Entry>
//using EntryMap = std::map<hc_pos_dim_t<DIM>, Entry>;

template <dimension_t DIM, typename Entry>
using EntryIterator = typename std::remove_const<decltype(EntryMap<DIM, Entry>().begin())>::type;
using EntryIterator = typename std::remove_const_t<decltype(EntryMap<DIM, Entry>().begin())>;
template <dimension_t DIM, typename Entry>
using EntryIteratorC = decltype(EntryMap<DIM, Entry>().cbegin());

Expand Down Expand Up @@ -167,8 +165,7 @@ class Node {
return const_cast<Node&>(*this).Find(key, postfix_len);
}

// TODO rename to lower_bound()
auto FindIter(const KeyT& key, bit_width_t postfix_len, bool& found) {
auto LowerBound(const KeyT& key, bit_width_t postfix_len, bool& found) {
hc_pos_t hc_pos = CalcPosInArray(key, postfix_len);
auto iter = entries_.lower_bound(hc_pos);
found =
Expand Down
126 changes: 9 additions & 117 deletions include/phtree/v16/phtree_v16.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class PhTreeV16 {
* whose second element is a bool that is true if the value was actually relocated.
*/
template <typename PRED>
size_t relocate_if2(const KeyT& old_key, const KeyT& new_key, PRED pred) {
[[deprecated]] size_t relocate_if2(const KeyT& old_key, const KeyT& new_key, PRED pred) {
auto pair = _find_two(old_key, new_key);
auto& iter_old = pair.first;
auto& iter_new = pair.second;
Expand Down Expand Up @@ -315,8 +315,6 @@ class PhTreeV16 {
return 1;
}

// TODO is this a memory leak problem?????

/*
* Relocate (move) an entry from one position to another, subject to a predicate.
*
Expand All @@ -327,77 +325,6 @@ class PhTreeV16 {
* @return A pair, whose first element points to the possibly relocated value, and
* whose second element is a bool that is true if the value was actually relocated.
*/
// TODO: test work with old relocate_if(). It also work with std::map
// TODO test also FAILS with B-Plus_tree_map; but not with array_map!
// WITHOUT ITERATOR
template <typename PRED>
auto relocate_ifX(const KeyT& old_key, const KeyT& new_key, PRED&& pred) {
bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key);

EntryT* current_entry = &root_; // An entry.
EntryT* old_node_entry = nullptr; // Node that contains entry to be removed
EntryT* old_node_entry_parent = nullptr; // Parent of the old_node_entry
EntryT* new_node_entry = nullptr; // Node that will contain new entry
// Find node for removal
while (current_entry && current_entry->IsNode()) {
old_node_entry_parent = old_node_entry;
old_node_entry = current_entry;
auto postfix_len = old_node_entry->GetNodePostfixLen();
if (postfix_len + 1 >= n_diverging_bits) {
new_node_entry = old_node_entry;
}
// TODO stop earlier, we are going to have to redo this after insert....
current_entry = current_entry->GetNode().Find(old_key, postfix_len);
}
EntryT* old_entry = current_entry; // Entry to be removed

// Can we stop already?
if (old_entry == nullptr || !pred(old_entry->GetValue())) {
return 0; // old_key not found!
}

// Are the keys equal? Or is the quadrant the same? -> same entry
if (n_diverging_bits == 0 || old_node_entry->GetNodePostfixLen() >= n_diverging_bits) {
old_entry->SetKey(new_key);
return 1;
}

// Find node for insertion
auto new_entry = new_node_entry;
while (new_entry && new_entry->IsNode()) {
new_node_entry = new_entry;
new_entry = new_entry->GetNode().Find(new_key, new_entry->GetNodePostfixLen());
}
if (new_entry != nullptr) {
return 0; // Entry exists!
}
bool is_inserted = false;
// TODO remove "if"
if (new_entry == nullptr) { // TODO use in-node pointer
new_entry = &new_node_entry->GetNode().Emplace(
is_inserted,
new_key,
new_node_entry->GetNodePostfixLen(),
std::move(old_entry->ExtractValue()));
}

// Erase old value. See comments in try_emplace(iterator) for details.
if (old_node_entry_parent == new_node_entry) {
// In this case the old_node_entry may have been invalidated by the previous
// insertion.
old_node_entry = old_node_entry_parent;
}

bool found = false;
while (old_node_entry) {
old_node_entry = old_node_entry->GetNode().Erase(
old_key, old_node_entry, old_node_entry != &root_, found);
}
assert(found);
return 1;
}

// WITH ITERATOR
template <typename PRED>
auto relocate_if(const KeyT& old_key, const KeyT& new_key, PRED&& pred) {
bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key);
Expand All @@ -418,7 +345,7 @@ class PhTreeV16 {
}
// TODO stop earlier, we are going to have to redo this after insert....
bool is_found = false;
iter = current_entry->GetNode().FindIter(old_key, postfix_len, is_found);
iter = current_entry->GetNode().LowerBound(old_key, postfix_len, is_found);
current_entry = is_found ? &iter->second : nullptr;
}
EntryT* old_entry = current_entry; // Entry to be removed
Expand All @@ -439,15 +366,13 @@ class PhTreeV16 {
bool is_found = false;
while (new_entry && new_entry->IsNode()) {
new_node_entry = new_entry;
iter = new_entry->GetNode().FindIter(new_key, new_entry->GetNodePostfixLen(), is_found);
iter =
new_entry->GetNode().LowerBound(new_key, new_entry->GetNodePostfixLen(), is_found);
new_entry = is_found ? &iter->second : nullptr;
}
if (is_found) {
return 0; // Entry exists
}
if (new_entry != nullptr) {
return 0; // Entry exists!
}
bool is_inserted = false;
new_entry = &new_node_entry->GetNode().Emplace(
iter,
Expand All @@ -463,13 +388,13 @@ class PhTreeV16 {
old_node_entry = old_node_entry_parent;
}

bool found = false;
// TODO use in-node pointer
is_found = false;
// TODO use in-node iterator if possible
while (old_node_entry) {
old_node_entry = old_node_entry->GetNode().Erase(
old_key, old_node_entry, old_node_entry != &root_, found);
old_key, old_node_entry, old_node_entry != &root_, is_found);
}
assert(found);
assert(is_found);
return 1;
}

Expand Down Expand Up @@ -525,18 +450,6 @@ class PhTreeV16 {
return std::make_pair(iter1, iter2);
}

// TODO what is different/required for MM:
// - "old" is not always removed
// - "new" may exist, but may not result in collision
// TODO i.e. we need three conditions:
// - pred_move ( {return is_valid(old); } )
// - pred_can_be_moved ( { return destination.emplace() == true; } )
// - pred_remove_old ( { source.erase(); return source.empty(); } )

// TODO
// - relocate(key, key, value) relocates 0 or 1 entries...?
// - relocate_if(key, key) relocates potentially many keys

public:
/*
* Tries to locate two entries that are 'close' to each other.
Expand All @@ -555,7 +468,7 @@ class PhTreeV16 {
bit_width_t n_diverging_bits = NumberOfDivergingBits(old_key, new_key);

if (!verify_exists && n_diverging_bits == 0) {
return 1; // TODO COUNT()?
return 1; // We omit calling because that would require looking up the entry...
}

EntryT* new_entry = &root_; // An entry.
Expand Down Expand Up @@ -682,7 +595,6 @@ class PhTreeV16 {
return std::make_pair(iter1, iter2);
}

public:
/*
* Iterates over all entries in the tree. The optional filter allows filtering entries and nodes
* (=sub-trees) before returning / traversing them. By default, all entries are returned. Filter
Expand Down Expand Up @@ -862,26 +774,6 @@ class PhTreeV16 {
return {parent, entry_iter};
}

std::pair<EntryT*, EntryIteratorC<DIM, EntryT>> find_starting_node(
const KeyT& key1, const KeyT& key2, bit_width_t& max_conflicting_bits) {
auto& prefix = key1;
max_conflicting_bits = NumberOfDivergingBits(key1, key2);
EntryT* parent = &root_;
if (max_conflicting_bits > root_.GetNodePostfixLen()) {
// Abort early if we have no shared prefix in the query
return {&root_, root_.GetNode().Entries().end()};
}
EntryIterator<DIM, EntryT> entry_iter =
root_.GetNode().FindPrefix(prefix, max_conflicting_bits, root_.GetNodePostfixLen());
while (entry_iter != parent->GetNode().Entries().end() && entry_iter->second.IsNode() &&
entry_iter->second.GetNodePostfixLen() >= max_conflicting_bits) {
parent = &entry_iter->second;
entry_iter = parent->GetNode().FindPrefix(
prefix, max_conflicting_bits, parent->GetNodePostfixLen());
}
return {parent, entry_iter};
}

size_t num_entries_;
// Contract: root_ contains a Node with 0 or more entries. The root node is the only Node
// that is allowed to have less than two entries.
Expand Down
1 change: 0 additions & 1 deletion test/phtree_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,6 @@ TEST(PhTreeTest, TestUpdateWithRelocateIf) {
TestPoint<dim> pNew{pOld[0] + delta, pOld[1] + delta, pOld[2] + delta};
if ((delta > 0.0 && tree.find(pNew) != tree.end()) || (i % 2 != 0)) {
// Skip this, there is already another entry
std::cout << "x = " << x << " i=" << i << std::endl;
ASSERT_EQ(0, tree.relocate_if(pOld, pNew, pred));
} else {
ASSERT_EQ(1, tree.relocate_if(pOld, pNew, pred));
Expand Down