Skip to content

Commit

Permalink
update bench report
Browse files Browse the repository at this point in the history
Signed-off-by: wineway <wangyuweihx@gmail.com>
  • Loading branch information
wineway committed Aug 20, 2024
1 parent 873f447 commit eb6fb3a
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 33 deletions.
80 changes: 64 additions & 16 deletions src/common/ring.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

#include <cstdint>

#include "ring_generic_allocator.hpp"
#include "log.hpp"
#include "ring_generic_allocator.hpp"

static_assert(sizeof(std::atomic<uint32_t>) == sizeof(uint32_t), "");
static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t), "");
Expand Down Expand Up @@ -244,20 +244,38 @@ struct Ring {
) {
bool success {};
uint32_t max = n;
uint32_t cons_tail;
const uint32_t capacity = this->capacity;

old_head = std::atomic_load_explicit(
reinterpret_cast<volatile std::atomic<uint32_t>*>(&this->prod_.head_
),
std::memory_order_relaxed
);
do {
/* Reset n to the initial burst count */
n = max;
old_head = this->prod_.head_;
/* add rmb barrier to avoid load/load reorder in weak
* memory model. It is noop on x86

/* Ensure the head is read before tail */
std::atomic_thread_fence(std::memory_order_acquire);

/* load-acquire synchronize with store-release of ht->tail
* in update_tail.
*/
sqk_smp_rmb();
cons_tail = std::atomic_load_explicit(
reinterpret_cast<volatile std::atomic<uint32_t>*>(
&this->cons_.tail_
),
std::memory_order_acquire
);

/*
* The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* *old_head > cons_tail). So 'free_entries' is always between 0
* and capacity (which is < size).
*/
free_entries = (this->capacity + this->cons_.tail_ - old_head);
free_entries = (capacity + cons_tail - old_head);
/* check that we have enough room in ring */
if (unlikely(n > free_entries)) {
if constexpr (transactional_prod) {
Expand All @@ -266,17 +284,21 @@ struct Ring {
n = free_entries;
}
}

if (n == 0) {
return 0;
}

new_head = old_head + n;
if constexpr (prod_sync_type == RingSyncType::SQK_RING_SYNC_MT) {
success = std::atomic_compare_exchange_strong(
success = std::atomic_compare_exchange_strong_explicit(
reinterpret_cast<volatile std::atomic<uint32_t>*>(
&this->prod_.head_
),
&old_head,
new_head
new_head,
std::memory_order_relaxed,
std::memory_order_relaxed
);
} else {
this->prod_.head_ = new_head, success = 1;
Expand Down Expand Up @@ -607,26 +629,40 @@ struct Ring {
uint32_t& entries
) {
unsigned int max = n;
uint32_t prod_tail;
int success;

/* move cons.head atomically */
old_head = std::atomic_load_explicit(
reinterpret_cast<volatile std::atomic<uint32_t>*>(&this->cons_.head_
),
std::memory_order_relaxed
);
do {
/* Restore n as it may change every loop */
n = max;

old_head = this->cons_.head_;

/* add rmb barrier to avoid load/load reorder in weak
* memory model. It is noop on x86
/* Ensure the head is read before tail */
std::atomic_thread_fence(std::memory_order_acquire);

/* this load-acquire synchronize with store-release of ht->tail
* in update_tail.
*/
sqk_smp_rmb();
prod_tail = std::atomic_load_explicit(
reinterpret_cast<volatile std::atomic<uint32_t>*>(
&this->prod_.tail_
),
std::memory_order_acquire
);

/* The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* cons_head > prod_tail). So 'entries' is always between 0
* and size(ring)-1.
*/
entries = (this->prod_.tail_ - old_head);
entries = (prod_tail - old_head);

/* Set the actual entries for dequeue */
if (n > entries) {
Expand All @@ -643,7 +679,6 @@ struct Ring {
new_head = old_head + n;
if constexpr (cons_sync_type == RingSyncType::SQK_RING_SYNC_ST) {
this->cons_.head_ = new_head;
sqk_smp_rmb();
success = 1;
} else {
success = std::atomic_compare_exchange_strong_explicit(
Expand Down Expand Up @@ -830,7 +865,13 @@ struct Ring {
return 0;
}
this->enqueue_elements(prod_head, &entry, n);
this->update_tail(this->prod_, prod_head, prod_next, prod_sync_type == RingSyncType::SQK_RING_SYNC_ST, 1);
this->update_tail(
this->prod_,
prod_head,
prod_next,
prod_sync_type == RingSyncType::SQK_RING_SYNC_ST,
1
);
return n;
} else if constexpr (prod_sync_type
== RingSyncType::SQK_RING_SYNC_MT_HTS) {
Expand Down Expand Up @@ -859,7 +900,13 @@ struct Ring {
return 0;
}
this->dequeue_elements(cons_head, &entry, n);
this->update_tail(this->cons_, cons_head, cons_next, cons_sync_type == RingSyncType::SQK_RING_SYNC_ST, 0);
this->update_tail(
this->cons_,
cons_head,
cons_next,
cons_sync_type == RingSyncType::SQK_RING_SYNC_ST,
0
);
return n;
} else if constexpr (cons_sync_type
== RingSyncType::SQK_RING_SYNC_MT_HTS) {
Expand All @@ -882,7 +929,8 @@ struct Ring {
};

template<typename T>
using MpscRing = Ring<T, RingSyncType::SQK_RING_SYNC_MT, RingSyncType::SQK_RING_SYNC_ST>;
using MpscRing =
Ring<T, RingSyncType::SQK_RING_SYNC_MT, RingSyncType::SQK_RING_SYNC_ST>;

template<typename RingType>
struct RingGuard {
Expand Down
13 changes: 13 additions & 0 deletions src/tests/common/report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 14.39 | 69,492,842.98 | 0.0% | 48.02 | 30.19 | 1.591 | 7.00 | 0.0% | 1.72 | `mpsc_ring enqueue`
| 4.36 | 229,421,611.95 | 0.0% | 41.02 | 9.14 | 4.487 | 4.00 | 0.0% | 0.52 | `spsc_ring enqueue`
| 16.88 | 59,254,109.36 | 0.0% | 51.02 | 35.40 | 1.441 | 7.00 | 0.0% | 2.02 | `hts mpsc_ring enqueue`
| 2.80 | 357,013,044.65 | 0.6% | 23.65 | 5.88 | 4.025 | 4.30 | 0.2% | 0.33 | `deque enqueue`
| 17.90 | 55,876,668.16 | 0.1% | 186.00 | 37.54 | 4.955 | 42.00 | 0.0% | 2.14 | `list enqueue`
| 14.35 | 69,682,196.62 | 0.0% | 48.02 | 30.10 | 1.595 | 7.00 | 0.0% | 1.71 | `mpsc_ring enqueue`
| 4.33 | 231,032,774.43 | 0.0% | 41.02 | 9.08 | 4.518 | 4.00 | 0.0% | 0.52 | `spsc_ring enqueue`
| 16.86 | 59,303,030.30 | 0.0% | 51.02 | 35.37 | 1.442 | 7.00 | 0.0% | 2.01 | `hts mpsc_ring enqueue`
| 2.76 | 362,186,470.36 | 0.2% | 25.30 | 5.79 | 4.368 | 4.61 | 0.1% | 0.33 | `deque enqueue`
| 17.14 | 58,357,336.08 | 0.1% | 186.00 | 35.94 | 5.175 | 42.00 | 0.0% | 2.05 | `list enqueue`

12 changes: 0 additions & 12 deletions src/tests/common/report.txt

This file was deleted.

10 changes: 10 additions & 0 deletions src/tests/common/ring_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ int main(int argc, char* argv[]) {
guard->enqueue(1);
int i;
guard->dequeue(i);
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -31,6 +32,7 @@ int main(int argc, char* argv[]) {
guard->enqueue(1);
int i;
guard->dequeue(i);
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -45,6 +47,7 @@ int main(int argc, char* argv[]) {
guard->enqueue(1);
int i;
guard->dequeue(i);
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -55,6 +58,7 @@ int main(int argc, char* argv[]) {
deq.push_back(1);
int i = deq.front();
deq.pop_front();
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -65,6 +69,7 @@ int main(int argc, char* argv[]) {
deq.push_back(1);
int i = deq.front();
deq.pop_front();
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -75,6 +80,7 @@ int main(int argc, char* argv[]) {
guard->enqueue(1);
uint64_t i;
guard->dequeue(i);
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -89,6 +95,7 @@ int main(int argc, char* argv[]) {
guard->enqueue(1);
uint64_t i;
guard->dequeue(i);
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -103,6 +110,7 @@ int main(int argc, char* argv[]) {
guard->enqueue(1);
uint64_t i;
guard->dequeue(i);
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -113,6 +121,7 @@ int main(int argc, char* argv[]) {
deq.push_back(1);
uint64_t i = deq.front();
deq.pop_front();
ankerl::nanobench::doNotOptimizeAway(i);
});
}
{
Expand All @@ -123,6 +132,7 @@ int main(int argc, char* argv[]) {
deq.push_back(1);
uint64_t i = deq.front();
deq.pop_front();
ankerl::nanobench::doNotOptimizeAway(i);
});
}

Expand Down
5 changes: 5 additions & 0 deletions src/tests/core/report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 22.56 | 44,317,419.84 | 0.2% | 242.02 | 47.33 | 5.113 | 49.01 | 0.0% | 0.27 | `sqk::scheduler benchmark`
| 2.89 | 346,185,355.98 | 0.0% | 2.00 | 6.06 | 0.330 | 0.00 | 47.8% | 0.03 | `function benchmark`
| 32,159.95 | 31,094.57 | 0.6% | 2,031.68 | 2,782.61 | 0.730 | 474.31 | 1.0% | 0.39 | `thread benchmark`
5 changes: 0 additions & 5 deletions src/tests/core/report.txt

This file was deleted.

0 comments on commit eb6fb3a

Please sign in to comment.