@@ -61,21 +61,28 @@ void initEnv() {
6161
6262void ncclLoadParam (char const * env, int64_t deftVal, int64_t uninitialized, int64_t * cache) {
6363 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
64+ if (__builtin_expect (__atomic_load_n (cache, __ATOMIC_RELAXED) != uninitialized, true )) {
65+ return ;
66+ }
67+
6468 pthread_mutex_lock (&mutex);
6569 if (__atomic_load_n (cache, __ATOMIC_RELAXED) == uninitialized) {
6670 const char * str = ncclGetEnv (env);
6771 int64_t value = deftVal;
6872 if (str && strlen (str) > 0 ) {
6973 errno = 0 ;
7074 value = strtoll (str, nullptr , 0 );
71- if (errno) {
72- value = deftVal;
73- INFO (NCCL_ALL," Invalid value %s for %s, using default %lld." , str, env, (long long )deftVal);
74- } else {
75- INFO (NCCL_ENV," %s set by environment to %lld." , env, (long long )value);
76- }
75+ value = errno ? deftVal : value;
7776 }
77+ // To prevent deadlock issues caused by logging in a loop,
78+ // so cache the value before the log operation.
7879 __atomic_store_n (cache, value, __ATOMIC_RELAXED);
80+
81+ if (errno) {
82+ INFO (NCCL_ALL," Invalid value %s for %s, using default %lld." , str, env, (long long )deftVal);
83+ } else {
84+ INFO (NCCL_ENV," %s set by environment to %lld." , env, (long long )value);
85+ }
7986 }
8087 pthread_mutex_unlock (&mutex);
8188}
@@ -84,4 +91,4 @@ const char *ncclGetEnv(const char *name) {
8491 static pthread_once_t once = PTHREAD_ONCE_INIT;
8592 pthread_once (&once, initEnv);
8693 return getenv (name);
87- }
94+ }
0 commit comments