Skip to content

Commit 5ce28bf

Browse files
authored
Simplify relocate() (#98)
1 parent 6706463 commit 5ce28bf

14 files changed

+626
-31
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1010
- Added B+tree multimap for internal (future) use. [#93](https://github.com/tzaeschke/phtree-cpp/issues/93)
1111

1212
### Changed
13+
- Rewrote relocate(). This should be much cleaner now and slightly faster. [#98](https://github.com/tzaeschke/phtree-cpp/pull/98)
1314
- Cleaned up HandleCollision() and key comparison functions. [#97](https://github.com/tzaeschke/phtree-cpp/pull/97)
1415
- Improved performance by eliminating memory indirection for DIM > 3.
1516
This was enabled by referencing "Node" directly in "Entry" which was enabled by

benchmark/update_mm_d_benchmark.cc

+5-2
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ typename std::enable_if<
166166
UpdateEntry(TestMap<SCENARIO, DIM>& tree, std::vector<UpdateOp<DIM>>& updates) {
167167
size_t n = 0;
168168
for (auto& update : updates) {
169-
n += tree.relocate(update.old_, update.new_, update.id_);
169+
n += tree.relocate(update.old_, update.new_, update.id_, false);
170170
}
171171
return n;
172172
}
@@ -177,7 +177,10 @@ typename std::enable_if<SCENARIO == Scenario::MM_SET_RELOCATE_IF, size_t>::type
177177
size_t n = 0;
178178
for (auto& update : updates) {
179179
n += tree.relocate_if(
180-
update.old_, update.new_, [&update](const payload_t& v) { return v == update.id_; });
180+
update.old_,
181+
update.new_,
182+
[&update](const payload_t& v) { return v == update.id_; },
183+
false);
181184
}
182185
return n;
183186
}

include/phtree/common/b_plus_tree_hash_map.h

+2
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class b_plus_tree_hash_set {
9292
using TreeT = b_plus_tree_hash_set<T, HashT, PredT>;
9393

9494
public:
95+
using value_compare = PredT;
9596
explicit b_plus_tree_hash_set() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {};
9697

9798
b_plus_tree_hash_set(const b_plus_tree_hash_set& other) : size_{other.size_} {
@@ -360,6 +361,7 @@ class b_plus_tree_hash_map {
360361
using EntryT = std::pair<KeyT, ValueT>;
361362

362363
public:
364+
using value_compare = PredT;
363365
b_plus_tree_hash_map() : map_{} {};
364366

365367
b_plus_tree_hash_map(const b_plus_tree_hash_map&) = default;

include/phtree/common/b_plus_tree_map.h

+28-2
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class b_plus_tree_map {
7777
// trees with a single inner leaf. '*2' is added because leaf filling is not compact.
7878
constexpr static size_t INNER_MAX = std::min(std::uint64_t(16), COUNT_MAX / LEAF_MAX * 2);
7979
static_assert(LEAF_MAX > 2 && LEAF_MAX < 1000);
80-
static_assert(COUNT_MAX <= (16*16) || (INNER_MAX > 2 && INNER_MAX < 1000));
80+
static_assert(COUNT_MAX <= (16 * 16) || (INNER_MAX > 2 && INNER_MAX < 1000));
8181
// TODO This could be improved but requires a code change to move > 1 entry when merging.
8282
constexpr static size_t LEAF_MIN = 2; // std::max((size_t)2, M_leaf >> 2);
8383
constexpr static size_t INNER_MIN = 2; // std::max((size_t)2, M_inner >> 2);
@@ -175,12 +175,33 @@ class b_plus_tree_map {
175175
return try_emplace(std::forward<Args>(args)...);
176176
}
177177

178+
template <typename... Args>
179+
auto emplace_hint(const IterT& hint, KeyT key, Args&&... args) {
180+
if (empty() || hint.is_end()) {
181+
return emplace(key, std::forward<Args>(args)...);
182+
}
183+
assert(hint.node_->is_leaf());
184+
185+
auto node = hint.node_->as_leaf();
186+
187+
// The following may drop a valid hint but is easy to check.
188+
if (node->data_.begin()->first > key || (node->data_.end() - 1)->first < key) {
189+
return emplace(key, std::forward<Args>(args)...);
190+
}
191+
return node->try_emplace(key, root_, size_, std::forward<Args>(args)...);
192+
}
193+
178194
template <typename... Args>
179195
auto try_emplace(KeyT key, Args&&... args) {
180196
auto leaf = lower_bound_or_last_leaf(key, root_);
181197
return leaf->try_emplace(key, root_, size_, std::forward<Args>(args)...);
182198
}
183199

200+
template <typename... Args>
201+
auto try_emplace(IterT iter, KeyT key, Args&&... args) {
202+
return emplace_hint(iter, key, std::forward<Args>(args)...);
203+
}
204+
184205
void erase(KeyT key) {
185206
auto leaf = lower_bound_leaf(key, root_);
186207
if (leaf != nullptr) {
@@ -198,6 +219,10 @@ class b_plus_tree_map {
198219
return size_;
199220
}
200221

222+
[[nodiscard]] bool empty() const noexcept {
223+
return size_ == 0;
224+
}
225+
201226
void _check() {
202227
size_t count = 0;
203228
NLeafT* prev_leaf = nullptr;
@@ -207,7 +232,8 @@ class b_plus_tree_map {
207232
}
208233

209234
private:
210-
using bpt_leaf_super = bpt_node_data<KeyT, NInnerT, NLeafT, NLeafT, LeafEntryT, IterT, LEAF_CFG>;
235+
using bpt_leaf_super =
236+
bpt_node_data<KeyT, NInnerT, NLeafT, NLeafT, LeafEntryT, IterT, LEAF_CFG>;
211237
class bpt_node_leaf : public bpt_leaf_super {
212238
public:
213239
explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept

include/phtree/common/common.h

+35
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,21 @@ static bit_width_t NumberOfDivergingBits(
103103
return MAX_BIT_WIDTH<SCALAR> - CountLeadingZeros(diff2);
104104
}
105105

106+
//template <dimension_t DIM, typename SCALAR>
107+
//static bit_width_t NumberOfDivergingBits2(
108+
// const PhPoint<DIM, SCALAR>& v1, const PhPoint<DIM, SCALAR>& v2) {
109+
// // write all differences to diff, we just check diff afterwards
110+
// SCALAR diff = 0;
111+
// //bit_mask_t<SCALAR> diff = 0;
112+
// for (dimension_t i = 0; i < DIM; ++i) {
113+
// diff |= v1[i] ^ v2[i];
114+
// }
115+
// bit_mask_t<SCALAR> diff2 = reinterpret_cast<bit_mask_t<SCALAR>&>(diff);
116+
// assert(CountLeadingZeros(diff2) <= MAX_BIT_WIDTH<SCALAR>);
117+
// return MAX_BIT_WIDTH<SCALAR> - CountLeadingZeros(diff2);
118+
//}
119+
120+
106121
template <dimension_t DIM, typename SCALAR>
107122
static bool KeyEquals(
108123
const PhPoint<DIM, SCALAR>& key_a, const PhPoint<DIM, SCALAR>& key_b, bit_width_t ignore_bits) {
@@ -112,6 +127,26 @@ static bool KeyEquals(
112127
}
113128
return diff >> ignore_bits == 0;
114129
}
130+
//template <dimension_t DIM, typename SCALAR>
131+
//static bool KeyEquals0(
132+
// const PhPoint<DIM, SCALAR>& key_a, const PhPoint<DIM, SCALAR>& key_b, SCALAR mask) {
133+
// for (dimension_t i = 0; i < DIM; ++i) {
134+
// if (((key_a[i] ^ key_b[i]) & mask) != 0) {
135+
// return false;
136+
// }
137+
// }
138+
// return true;
139+
//}
140+
//
141+
//template <dimension_t DIM, typename SCALAR>
142+
//static bool KeyEquals1(
143+
// const PhPoint<DIM, SCALAR>& key_a, const PhPoint<DIM, SCALAR>& key_b, SCALAR mask) {
144+
// SCALAR sum = 0;
145+
// for (dimension_t i = 0; i < DIM; ++i) {
146+
// sum |= (key_a[i] ^ key_b[i]);
147+
// }
148+
// return (sum & mask) == 0;
149+
//}
115150

116151
// ************************************************************************
117152
// String helpers

include/phtree/common/flat_array_map.h

+6
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,12 @@ class array_map {
276276
return data_->try_emplace_base(index, std::forward<Args>(args)...);
277277
}
278278

279+
template <typename... Args>
280+
auto try_emplace(const iterator&, size_t index, Args&&... args) {
281+
// We ignore the iterator, this is an array based collection, so access is ~O(1).
282+
return data_->try_emplace_base(index, std::forward<Args>(args)...);
283+
}
284+
279285
bool erase(size_t index) {
280286
return data_->erase(index);
281287
}

include/phtree/common/flat_sparse_map.h

+23-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ namespace improbable::phtree {
3939
template <typename KeyT, typename ValueT>
4040
class sparse_map {
4141
using Entry = std::pair<KeyT, ValueT>;
42+
using iterator = typename std::vector<Entry>::iterator;
4243

4344
public:
4445
explicit sparse_map() : data_{} {
@@ -104,15 +105,20 @@ class sparse_map {
104105
return try_emplace_base(key, std::forward<Args>(args)...);
105106
}
106107

108+
template <typename... Args>
109+
auto try_emplace(iterator iter, size_t key, Args&&... args) {
110+
return try_emplace_base(iter, key, std::forward<Args>(args)...);
111+
}
112+
107113
void erase(KeyT key) {
108114
auto it = lower_bound(key);
109115
if (it != end() && it->first == key) {
110116
data_.erase(it);
111117
}
112118
}
113119

114-
void erase(const typename std::vector<Entry>::iterator& iterator) {
115-
data_.erase(iterator);
120+
void erase(const iterator& iter) {
121+
data_.erase(iter);
116122
}
117123

118124
[[nodiscard]] size_t size() const {
@@ -145,6 +151,21 @@ class sparse_map {
145151
}
146152
}
147153

154+
// TODO merge with above
155+
template <typename... Args>
156+
auto try_emplace_base(const iterator& it, KeyT key, Args&&... args) {
157+
if (it != end() && it->first == key) {
158+
return std::make_pair(it, false);
159+
} else {
160+
auto x = data_.emplace(
161+
it,
162+
std::piecewise_construct,
163+
std::forward_as_tuple(key),
164+
std::forward_as_tuple(std::forward<Args>(args)...));
165+
return std::make_pair(x, true);
166+
}
167+
}
168+
148169
std::vector<Entry> data_;
149170
};
150171

include/phtree/phtree_multimap.h

+60-13
Original file line numberDiff line numberDiff line change
@@ -414,17 +414,33 @@ class PhTreeMultiMap {
414414
* @param new_key The new position
415415
* @param value The value that needs to be relocated. The relocate() method used the value's
416416
* '==' operator to identify the entry that should be moved.
417-
* @param count_equals This setting toggles whether a relocate() between two identical keys
418-
* should be counted as 'success' and return '1'. The function may still return '0'
419-
* in case the keys are not in the index.
420-
* Background: the intuitively correct behavior is to return '1' for identical
421-
* (exising) keys. However, avoiding this check can considerably speed up
422-
* relocate() calls, especially when using a ConverterMultiply.
417+
* @param verify_exists This setting toggles whether a relocate() between two identical keys
418+
* should verify whether the key actually exist before return '1'.
419+
* If set to 'false', this function will return '1' if the keys are identical,
420+
* without checking whether the keys actually exist. Avoiding this check can
421+
* considerably speed up relocate() calls, especially when using a
422+
* ConverterMultiply.
423423
*
424424
* @return '1' if a value was found and reinserted, otherwise '0'.
425425
*/
426426
template <typename T2>
427-
size_t relocate(const Key& old_key, const Key& new_key, T2&& value, bool count_equals = true) {
427+
size_t relocate(const Key& old_key, const Key& new_key, T2&& value, bool verify_exists = true) {
428+
auto fn = [&value](BUCKET& src, BUCKET& dst) -> size_t {
429+
auto it = src.find(value);
430+
if (it != src.end() && dst.emplace(std::move(*it)).second) {
431+
src.erase(it);
432+
return 1;
433+
}
434+
return 0;
435+
};
436+
auto count_fn = [&value](BUCKET& src) -> size_t { return src.find(value) != src.end(); };
437+
return tree_._relocate_mm(
438+
converter_.pre(old_key), converter_.pre(new_key), verify_exists, fn, count_fn);
439+
}
440+
441+
template <typename T2>
442+
[[deprecated]] size_t relocate2(
443+
const Key& old_key, const Key& new_key, T2&& value, bool count_equals = true) {
428444
auto pair = tree_._find_or_create_two_mm(
429445
converter_.pre(old_key), converter_.pre(new_key), count_equals);
430446
auto& iter_old = pair.first;
@@ -478,17 +494,48 @@ class PhTreeMultiMap {
478494
* @param new_key The new position
479495
* @param predicate The predicate that is used for every value at position old_key to evaluate
480496
* whether it should be relocated to new_key.
481-
* @param count_equals This setting toggles whether a relocate() between two identical keys
482-
* should be counted as 'success' and return '1'. The function may still return '0'
483-
* in case the keys are not in the index.
484-
* Background: the intuitively correct behavior is to return '1' for identical
485-
* (exising) keys. However, avoiding this check can considerably speed up
486-
* relocate() calls, especially when using a ConverterMultiply.
497+
* @param verify_exists This setting toggles whether a relocate() between two identical keys
498+
* should verify whether the key actually exist before return '1'.
499+
* If set to 'false', this function will return '1' if the keys are identical,
500+
* without checking whether the keys actually exist. Avoiding this check can
501+
* considerably speed up relocate() calls, especially when using a
502+
* ConverterMultiply.
487503
*
488504
* @return the number of values that were relocated.
489505
*/
490506
template <typename PREDICATE>
491507
size_t relocate_if(
508+
const Key& old_key, const Key& new_key, PREDICATE&& pred_fn, bool verify_exists = true) {
509+
auto fn = [&pred_fn](BUCKET& src, BUCKET& dst) -> size_t {
510+
size_t result = 0;
511+
auto iter_src = src.begin();
512+
while (iter_src != src.end()) {
513+
if (pred_fn(*iter_src) && dst.emplace(std::move(*iter_src)).second) {
514+
iter_src = src.erase(iter_src);
515+
++result;
516+
} else {
517+
++iter_src;
518+
}
519+
}
520+
return result;
521+
};
522+
auto count_fn = [&pred_fn](BUCKET& src) -> size_t {
523+
size_t result = 0;
524+
auto iter_src = src.begin();
525+
while (iter_src != src.end()) {
526+
if (pred_fn(*iter_src)) {
527+
++result;
528+
}
529+
++iter_src;
530+
}
531+
return result;
532+
};
533+
return tree_._relocate_mm(
534+
converter_.pre(old_key), converter_.pre(new_key), verify_exists, fn, count_fn);
535+
}
536+
537+
template <typename PREDICATE>
538+
[[deprecated]] size_t relocate_if2(
492539
const Key& old_key, const Key& new_key, PREDICATE&& predicate, bool count_equals = true) {
493540
auto pair = tree_._find_or_create_two_mm(
494541
converter_.pre(old_key), converter_.pre(new_key), count_equals);

include/phtree/v16/entry.h

+5
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,11 @@ class Entry {
177177
kd_key_ = key;
178178
}
179179

180+
void SetValue(T&& value) noexcept {
181+
assert(union_type_ == VALUE);
182+
value_ = std::move(value);
183+
}
184+
180185
void SetNode(NodeT&& node, bit_width_t postfix_len) noexcept {
181186
postfix_len_ = static_cast<std::uint16_t>(postfix_len);
182187
DestroyUnion();

include/phtree/v16/node.h

+35-1
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,11 @@ using EntryMap = typename std::conditional_t<
4747
DIM <= 8,
4848
sparse_map<hc_pos_dim_t<DIM>, Entry>,
4949
b_plus_tree_map<std::uint64_t, Entry, (uint64_t(1) << DIM)>>>;
50+
//template <dimension_t DIM, typename Entry>
51+
//using EntryMap = std::map<hc_pos_dim_t<DIM>, Entry>;
5052

5153
template <dimension_t DIM, typename Entry>
52-
using EntryIterator = decltype(EntryMap<DIM, Entry>().begin());
54+
using EntryIterator = typename std::remove_const<decltype(EntryMap<DIM, Entry>().begin())>::type;
5355
template <dimension_t DIM, typename Entry>
5456
using EntryIteratorC = decltype(EntryMap<DIM, Entry>().cbegin());
5557

@@ -131,6 +133,20 @@ class Node {
131133
return HandleCollision(entry, is_inserted, key, postfix_len, std::forward<Args>(args)...);
132134
}
133135

136+
template <typename IterT, typename... Args>
137+
EntryT& Emplace(IterT iter, bool& is_inserted, const KeyT& key,
138+
bit_width_t postfix_len, Args&&... args) {
139+
hc_pos_t hc_pos = CalcPosInArray(key, postfix_len); // TODO pass in -> should be known!
140+
auto emplace_result = entries_.try_emplace(iter, hc_pos, key, std::forward<Args>(args)...);
141+
auto& entry = emplace_result.first->second;
142+
// Return if emplace succeed, i.e. there was no entry.
143+
if (emplace_result.second) {
144+
is_inserted = true;
145+
return entry;
146+
}
147+
return HandleCollision(entry, is_inserted, key, postfix_len, std::forward<Args>(args)...);
148+
}
149+
134150
/*
135151
* Returns the value (T or Node) if the entry exists and matches the key. Child nodes are
136152
* _not_ traversed.
@@ -151,6 +167,24 @@ class Node {
151167
return const_cast<Node&>(*this).Find(key, postfix_len);
152168
}
153169

170+
// TODO rename to lower_bound()
171+
auto FindIter(const KeyT& key, bit_width_t postfix_len, bool& found) {
172+
hc_pos_t hc_pos = CalcPosInArray(key, postfix_len);
173+
auto iter = entries_.lower_bound(hc_pos);
174+
found =
175+
(iter != entries_.end() && iter->first == hc_pos &&
176+
DoesEntryMatch(iter->second, key, postfix_len));
177+
return iter;
178+
}
179+
180+
auto End() {
181+
return entries_.end();
182+
}
183+
184+
auto End() const {
185+
return entries_.end();
186+
}
187+
154188
EntryIteratorC<DIM, EntryT> FindPrefix(
155189
const KeyT& prefix, bit_width_t prefix_post_len, bit_width_t node_postfix_len) const {
156190
assert(prefix_post_len <= node_postfix_len);

0 commit comments

Comments
 (0)