Skip to content

Commit 6f26b29

Browse files
committed
Spinlock versus mutex queue lock tests
1 parent 24a8084 commit 6f26b29

File tree

5 files changed

+67
-11
lines changed

5 files changed

+67
-11
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ All measurement and calibration code instrumentation is non blocking and the tri
268268
There are no heap allocation during runtime, except for the lazy registrations of and for A2L generation.
269269
270270
build.rs automatically builds a minimum static C library from individially preconfigured core XCPlite sources.
271-
On C level, there is a synchronisation mutex or spinlock for the mpsc transmit queue.
271+
On C level, there is a synchronisation mutex for the mpsc transmit queue.
272272
The C code has the option to start the server with 2 normal threads for rx and tx socket handling.
273273
274274
The generated A2L file is finalized on XCP connect and provided for upload via XCP.

tests/test_multi_thread.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Integration test for XCP in a multi threaded application
33
// Uses the test XCP client in xcp_client
44

5-
// cargo test --features=json --features=auto_reg -- --test-threads=1 --nocapture --test test_multi_thread
5+
// cargo test --features=json --features=auto_reg --features=a2l_reader -- --test-threads=1 --nocapture --test test_multi_thread
66

77
#![allow(unused_assignments)]
88

tests/test_single_thread.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
// Integration test for XCP in a single thread application
33
// Uses the test XCP client in module xcp_client
44

5-
// cargo test --features=json --features=auto_reg -- --test-threads=1 --nocapture --test test_single_thread
6-
5+
// cargo test --features=json --features=auto_reg --features=a2l_reader -- --test-threads=1 --nocapture --test test_single_thread
76
use xcp::*;
87
use xcp_type_description::prelude::*;
98

xcplib/src/platform.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ typedef HANDLE tXcpThread;
8181
#elif defined(_LINUX) // Linux
8282

8383
typedef pthread_t tXcpThread;
84-
#define create_thread(h,t) pthread_create(h, NULL, t, NULL);
85-
#define join_thread(h) pthread_join(h,NULL);
84+
#define create_thread(h,t) pthread_create(h, NULL, t, NULL)
85+
#define join_thread(h) pthread_join(h,NULL)
8686
#define cancel_thread(h) { pthread_detach(h); pthread_cancel(h); }
87+
#define yield_thread() sched_yield()
8788

8889
#endif
8990

xcplib/src/xcpTlQueue.c

+61-5
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,57 @@
1616
#include "dbg_print.h"
1717
#include "xcpLite.h"
1818

19+
// Experimental
20+
// Use spinlock/mutex instead of mutex for producer lock
21+
// This naiv approach is usually not faster compared to a mutex and can produce higher latencies and hard to predict impact on other threads
22+
// It might be a better solution for non preemptive tasks
23+
//#define USE_SPINLOCK
24+
//#define USE_YIELD
25+
//#define TEST_LOCK_TIMING
26+
27+
/*
28+
Test results from test_multi_thread with 32 tasks and 200us sleep time:
29+
maxLock and avgLock time in ns
30+
31+
SPINLOCK+YIELD
32+
lockCount=501170, maxLock=296000, avgLock=768
33+
lockCount=501019, maxLock=195000, avgLock=744
34+
lockCount=500966, maxLock=210000, avgLock=724
35+
36+
SPINLOCK without cache friendly lock check
37+
lockCount=492952, maxLock=10115000, avgLock=1541
38+
39+
SPINLOCK
40+
lockCount=497254, maxLock=9935000, avgLock=512
41+
lockCount=494866, maxLock=11935000, avgLock=1322
42+
lockCount=490923, maxLock=10019000, avgLock=2073
43+
lockCount=489831, maxLock=10024000, avgLock=1980
44+
45+
MUTEX
46+
lockCount=499798, maxLock=114000, avgLock=840
47+
lockCount=500202, maxLock=135000, avgLock=806
48+
lockCount=499972, maxLock=130000, avgLock=790
49+
lockCount=500703, maxLock=124000, avgLock=755
50+
lockCount=500773, maxLock=126000, avgLock=669
51+
*/
52+
53+
#ifdef TEST_LOCK_TIMING
54+
static uint64_t lockTimeMax = 0;
55+
static uint64_t lockTimeSum = 0;
56+
static uint64_t lockCount = 0;
57+
#endif
1958

2059
#ifndef _WIN
2160

2261
#include <stdatomic.h>
2362

24-
// Use spinlock instead of mutex for producer lock
25-
#define USE_SPINLOCK
26-
2763
#else
2864

2965
#ifdef _WIN32_
3066
#error "Windows32 not implemented yet"
3167
#else
3268

33-
69+
#undef USE_SPINLOCK
3470
#define atomic_uint_fast64_t uint64_t
3571
#define atomic_store(a,b) (*a)=(b)
3672
#define atomic_load(a) (*a)
@@ -100,6 +136,10 @@ void XcpTlFreeTransmitQueue() {
100136
#ifndef USE_SPINLOCK
101137
mutexDestroy(&gXcpTlQueue.mutex);
102138
#endif
139+
140+
#ifdef TEST_LOCK_TIMING
141+
DBG_PRINTF3("XcpTlFreeTransmitQueue: overruns=%u, lockCount=%llu, maxLock=%llu, avgLock=%llu\n", gXcpTlQueue.overruns, lockCount, lockTimeMax, lockTimeSum/lockCount);
142+
#endif
103143
}
104144

105145

@@ -127,11 +167,27 @@ uint8_t* XcpTlGetTransmitBuffer(void** handle, uint16_t packet_len) {
127167
DBG_PRINTF5("XcpTlGetTransmitBuffer: len=%d\n", packet_len);
128168

129169
// Producer lock
170+
#ifdef TEST_LOCK_TIMING
171+
uint64_t c = clockGet();
172+
#endif
130173
#ifdef USE_SPINLOCK
131-
while (atomic_flag_test_and_set_explicit(&lock, memory_order_acquire));
174+
for (uint32_t n = 1;1;n++) {
175+
BOOL locked = atomic_load_explicit(&lock._Value, memory_order_relaxed);
176+
if (!locked && !atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)) break;
177+
//if ( !atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)) break;
178+
#ifdef USE_YIELD
179+
if (n%16==0) yield_thread();
180+
#endif
181+
}
132182
#else
133183
mutexLock(&gXcpTlQueue.mutex);
134184
#endif
185+
#ifdef TEST_LOCK_TIMING
186+
uint64_t d = clockGet() - c;
187+
if (d>lockTimeMax) lockTimeMax = d;
188+
lockTimeSum += d;
189+
lockCount++;
190+
#endif
135191

136192
uint64_t head = atomic_load(&gXcpTlQueue.head);
137193
uint64_t tail = atomic_load_explicit(&gXcpTlQueue.tail,memory_order_relaxed);

0 commit comments

Comments
 (0)