@@ -61,21 +61,29 @@ void initEnv() {
6161
6262void ncclLoadParam (char const * env, int64_t deftVal, int64_t uninitialized, int64_t * cache) {
6363 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
64+ if (__builtin_expect (__atomic_load_n (cache, __ATOMIC_RELAXED) != uninitialized, true )) {
65+ return ;
66+ }
67+
6468 pthread_mutex_lock (&mutex);
6569 if (__atomic_load_n (cache, __ATOMIC_RELAXED) == uninitialized) {
6670 const char * str = ncclGetEnv (env);
6771 int64_t value = deftVal;
68- if (str && strlen (str) > 0 ) {
69- errno = 0 ;
70- value = strtoll (str, nullptr , 0 );
71- if (errno) {
72- value = deftVal;
73- INFO (NCCL_ALL," Invalid value %s for %s, using default %lld." , str, env, (long long )deftVal);
74- } else {
75- INFO (NCCL_ENV," %s set by environment to %lld." , env, (long long )value);
76- }
72+ if (!str || strlen (str) <= 0 ) {
73+ __atomic_store_n (cache, value, __ATOMIC_RELAXED);
74+ pthread_mutex_unlock (&mutex);
75+ return ;
76+ }
77+ errno = 0 ;
78+ value = strtoll (str, nullptr , 0 );
79+ // To prevent deadlock issues caused by logging in a loop,
80+ // so cache the value before the log operation.
81+ __atomic_store_n (cache, errno ? deftVal : value, __ATOMIC_RELAXED);
82+ if (errno) {
83+ INFO (NCCL_ALL," Invalid value %s for %s, using default %lld." , str, env, (long long )deftVal);
84+ } else {
85+ INFO (NCCL_ENV," %s set by environment to %lld." , env, (long long )value);
7786 }
78- __atomic_store_n (cache, value, __ATOMIC_RELAXED);
7987 }
8088 pthread_mutex_unlock (&mutex);
8189}
@@ -84,4 +92,4 @@ const char *ncclGetEnv(const char *name) {
8492 static pthread_once_t once = PTHREAD_ONCE_INIT;
8593 pthread_once (&once, initEnv);
8694 return getenv (name);
87- }
95+ }
0 commit comments