@@ -61,7 +61,7 @@ void parallel_merge(
61
61
s2s[nt - 1 ].i1 = s2.i1 ;
62
62
63
63
// not sure parallel actually helps here
64
- #pragma omp parallel for num_threads(nt )
64
+ #pragma omp parallel for num_threads(num_omp_threads )
65
65
for (int t = 0 ; t < nt; t++) {
66
66
s1s[t].i0 = s1.i0 + s1.len () * t / nt;
67
67
s1s[t].i1 = s1.i0 + s1.len () * (t + 1 ) / nt;
@@ -93,7 +93,7 @@ void parallel_merge(
93
93
assert (sws[nt - 1 ].i1 == s1.i1 );
94
94
95
95
// do the actual merging
96
- #pragma omp parallel for num_threads(nt )
96
+ #pragma omp parallel for num_threads(num_omp_threads )
97
97
for (int t = 0 ; t < nt; t++) {
98
98
SegmentS sw = sws[t];
99
99
SegmentS s1t = s1s[t];
@@ -176,7 +176,7 @@ void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
176
176
int sub_nt = nseg % 2 == 0 ? nt : nt - 1 ;
177
177
int sub_nseg1 = nseg / 2 ;
178
178
179
- #pragma omp parallel for num_threads(nseg1 )
179
+ #pragma omp parallel for num_threads(num_omp_threads )
180
180
for (int s = 0 ; s < nseg; s += 2 ) {
181
181
if (s + 1 == nseg) { // otherwise isolated segment
182
182
memcpy (permB + segs[s].i0 ,
@@ -257,7 +257,7 @@ void bucket_sort_parallel(
257
257
int64_t * perm,
258
258
int nt_in) {
259
259
memset (lims, 0 , sizeof (*lims) * (vmax + 1 ));
260
- #pragma omp parallel num_threads(nt_in )
260
+ #pragma omp parallel num_threads(num_omp_threads )
261
261
{
262
262
int nt = omp_get_num_threads (); // might be different from nt_in
263
263
int rank = omp_get_thread_num ();
@@ -483,7 +483,7 @@ void bucket_sort_inplace_parallel(
483
483
nbucket); // DON'T use std::vector<bool> that cannot be accessed
484
484
// safely from multiple threads!!!
485
485
486
- #pragma omp parallel num_threads(nt_in )
486
+ #pragma omp parallel num_threads(num_omp_threads )
487
487
{
488
488
int nt = omp_get_num_threads (); // might be different from nt_in (?)
489
489
int rank = omp_get_thread_num ();
@@ -709,7 +709,7 @@ inline int64_t hash_function(int64_t x) {
709
709
710
710
void hashtable_int64_to_int64_init (int log2_capacity, int64_t * tab) {
711
711
size_t capacity = (size_t )1 << log2_capacity;
712
- #pragma omp parallel for
712
+ #pragma omp parallel for num_threads(num_omp_threads)
713
713
for (int64_t i = 0 ; i < capacity; i++) {
714
714
tab[2 * i] = -1 ;
715
715
tab[2 * i + 1 ] = -1 ;
@@ -729,7 +729,7 @@ void hashtable_int64_to_int64_add(
729
729
int log2_nbucket = log2_capacity_to_log2_nbucket (log2_capacity);
730
730
size_t nbucket = (size_t )1 << log2_nbucket;
731
731
732
- #pragma omp parallel for
732
+ #pragma omp parallel for num_threads(num_omp_threads)
733
733
for (int64_t i = 0 ; i < n; i++) {
734
734
hk[i] = hash_function (keys[i]) & mask;
735
735
bucket_no[i] = hk[i] >> (log2_capacity - log2_nbucket);
@@ -746,7 +746,7 @@ void hashtable_int64_to_int64_add(
746
746
omp_get_max_threads ());
747
747
748
748
int num_errors = 0 ;
749
- #pragma omp parallel for reduction(+ : num_errors)
749
+ #pragma omp parallel for reduction(+ : num_errors) num_threads(num_omp_threads)
750
750
for (int64_t bucket = 0 ; bucket < nbucket; bucket++) {
751
751
size_t k0 = bucket << (log2_capacity - log2_nbucket);
752
752
size_t k1 = (bucket + 1 ) << (log2_capacity - log2_nbucket);
@@ -793,7 +793,7 @@ void hashtable_int64_to_int64_lookup(
793
793
int64_t mask = capacity - 1 ;
794
794
int log2_nbucket = log2_capacity_to_log2_nbucket (log2_capacity);
795
795
796
- #pragma omp parallel for
796
+ #pragma omp parallel for num_threads(num_omp_threads)
797
797
for (int64_t i = 0 ; i < n; i++) {
798
798
int64_t k = keys[i];
799
799
int64_t hk = hash_function (k) & mask;
0 commit comments