|
16 | 16 | #include "dbg_print.h"
|
17 | 17 | #include "xcpLite.h"
|
18 | 18 |
|
| 19 | +// Experimental |
| 20 | +// Use spinlock/mutex instead of mutex for producer lock |
| 21 | +// This naiv approach is usually not faster compared to a mutex and can produce higher latencies and hard to predict impact on other threads |
| 22 | +// It might be a better solution for non preemptive tasks |
| 23 | +//#define USE_SPINLOCK |
| 24 | +//#define USE_YIELD |
| 25 | +//#define TEST_LOCK_TIMING |
| 26 | + |
| 27 | +/* |
| 28 | +Test results from test_multi_thread with 32 tasks and 200us sleep time: |
| 29 | +maxLock and avgLock time in ns |
| 30 | +
|
| 31 | +SPINLOCK+YIELD |
| 32 | + lockCount=501170, maxLock=296000, avgLock=768 |
| 33 | + lockCount=501019, maxLock=195000, avgLock=744 |
| 34 | + lockCount=500966, maxLock=210000, avgLock=724 |
| 35 | +
|
| 36 | +SPINLOCK without cache friendly lock check |
| 37 | + lockCount=492952, maxLock=10115000, avgLock=1541 |
| 38 | +
|
| 39 | +SPINLOCK |
| 40 | + lockCount=497254, maxLock=9935000, avgLock=512 |
| 41 | + lockCount=494866, maxLock=11935000, avgLock=1322 |
| 42 | + lockCount=490923, maxLock=10019000, avgLock=2073 |
| 43 | + lockCount=489831, maxLock=10024000, avgLock=1980 |
| 44 | +
|
| 45 | +MUTEX |
| 46 | + lockCount=499798, maxLock=114000, avgLock=840 |
| 47 | + lockCount=500202, maxLock=135000, avgLock=806 |
| 48 | + lockCount=499972, maxLock=130000, avgLock=790 |
| 49 | + lockCount=500703, maxLock=124000, avgLock=755 |
| 50 | + lockCount=500773, maxLock=126000, avgLock=669 |
| 51 | +*/ |
| 52 | + |
| 53 | +#ifdef TEST_LOCK_TIMING |
| 54 | +static uint64_t lockTimeMax = 0; |
| 55 | +static uint64_t lockTimeSum = 0; |
| 56 | +static uint64_t lockCount = 0; |
| 57 | +#endif |
19 | 58 |
|
20 | 59 | #ifndef _WIN
|
21 | 60 |
|
22 | 61 | #include <stdatomic.h>
|
23 | 62 |
|
24 |
| -// Use spinlock instead of mutex for producer lock |
25 |
| -#define USE_SPINLOCK |
26 |
| - |
27 | 63 | #else
|
28 | 64 |
|
29 | 65 | #ifdef _WIN32_
|
30 | 66 | #error "Windows32 not implemented yet"
|
31 | 67 | #else
|
32 | 68 |
|
33 |
| - |
| 69 | +#undef USE_SPINLOCK |
34 | 70 | #define atomic_uint_fast64_t uint64_t
|
35 | 71 | #define atomic_store(a,b) (*a)=(b)
|
36 | 72 | #define atomic_load(a) (*a)
|
@@ -100,6 +136,10 @@ void XcpTlFreeTransmitQueue() {
|
100 | 136 | #ifndef USE_SPINLOCK
|
101 | 137 | mutexDestroy(&gXcpTlQueue.mutex);
|
102 | 138 | #endif
|
| 139 | + |
| 140 | +#ifdef TEST_LOCK_TIMING |
| 141 | + DBG_PRINTF3("XcpTlFreeTransmitQueue: overruns=%u, lockCount=%llu, maxLock=%llu, avgLock=%llu\n", gXcpTlQueue.overruns, lockCount, lockTimeMax, lockTimeSum/lockCount); |
| 142 | +#endif |
103 | 143 | }
|
104 | 144 |
|
105 | 145 |
|
@@ -127,11 +167,27 @@ uint8_t* XcpTlGetTransmitBuffer(void** handle, uint16_t packet_len) {
|
127 | 167 | DBG_PRINTF5("XcpTlGetTransmitBuffer: len=%d\n", packet_len);
|
128 | 168 |
|
129 | 169 | // Producer lock
|
| 170 | +#ifdef TEST_LOCK_TIMING |
| 171 | + uint64_t c = clockGet(); |
| 172 | +#endif |
130 | 173 | #ifdef USE_SPINLOCK
|
131 |
| - while (atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)); |
| 174 | + for (uint32_t n = 1;1;n++) { |
| 175 | + BOOL locked = atomic_load_explicit(&lock._Value, memory_order_relaxed); |
| 176 | + if (!locked && !atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)) break; |
| 177 | + //if ( !atomic_flag_test_and_set_explicit(&lock, memory_order_acquire)) break; |
| 178 | + #ifdef USE_YIELD |
| 179 | + if (n%16==0) yield_thread(); |
| 180 | + #endif |
| 181 | + } |
132 | 182 | #else
|
133 | 183 | mutexLock(&gXcpTlQueue.mutex);
|
134 | 184 | #endif
|
| 185 | +#ifdef TEST_LOCK_TIMING |
| 186 | + uint64_t d = clockGet() - c; |
| 187 | + if (d>lockTimeMax) lockTimeMax = d; |
| 188 | + lockTimeSum += d; |
| 189 | + lockCount++; |
| 190 | +#endif |
135 | 191 |
|
136 | 192 | uint64_t head = atomic_load(&gXcpTlQueue.head);
|
137 | 193 | uint64_t tail = atomic_load_explicit(&gXcpTlQueue.tail,memory_order_relaxed);
|
|
0 commit comments