Skip to content

Commit 3e713b0

Browse files
committed
Initial attempt to discover some node-related bugs
It looks like there are some strange issue with nodes assignment, and I am not able to track them down, for now. At least, with this commit, the program will abort if a node is being remapped.
1 parent 9e42bc5 commit 3e713b0

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

driver/others/init.c

+14-2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8787
#include <dlfcn.h>
8888
#include <unistd.h>
8989
#include <string.h>
90+
#include <stdatomic.h>
9091

9192
#if defined(BIGNUMA)
9293
// max number of nodes as defined in numa.h
@@ -134,7 +135,7 @@ typedef struct {
134135
static cpu_set_t cpu_orig_mask[4];
135136

136137
static int cpu_mapping[MAX_CPUS];
137-
static int node_mapping[MAX_CPUS * 4];
138+
static _Atomic int node_mapping[MAX_CPUS * 4];
138139
static int cpu_sub_mapping[MAX_CPUS];
139140
static int disable_mapping;
140141

@@ -784,7 +785,15 @@ int gotoblas_set_affinity(int pos) {
784785

785786
sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
786787

787-
node_mapping[WhereAmI()] = mynode;
788+
{
789+
const int node_mapping_index = WhereAmI();
790+
int uninitialized_node = -1;
791+
if(!atomic_compare_exchange_strong(&node_mapping[node_mapping_index], &uninitialized_node, mynode))
792+
{
793+
fprintf(stderr, "ERROR: node %d was already mapped to %d (now mapping to %d)\n", node_mapping_index, uninitialized_node, mynode);
794+
abort();
795+
}
796+
}
788797

789798
}
790799

@@ -812,6 +821,9 @@ void gotoblas_affinity_init(void) {
812821

813822
initialized = 1;
814823

824+
for(size_t node_index = 0; node_index < sizeof(node_mapping) / sizeof(node_mapping[0]); ++node_index)
825+
node_mapping[node_index] = -1;
826+
815827
sched_getaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
816828

817829
#ifdef USE_OPENMP

0 commit comments

Comments
 (0)