Skip to content

Commit 16c2080

Browse files
committed
Avoid deadlock issues caused by logging in a loop.
Signed-off-by: wangfakang <[email protected]>
1 parent ab2b89c commit 16c2080

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

src/misc/param.cc

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,21 +61,28 @@ void initEnv() {
6161

6262
void ncclLoadParam(char const* env, int64_t deftVal, int64_t uninitialized, int64_t* cache) {
6363
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
64+
if (__builtin_expect(__atomic_load_n(cache, __ATOMIC_RELAXED) != uninitialized, true)) {
65+
return;
66+
}
67+
6468
pthread_mutex_lock(&mutex);
6569
if (__atomic_load_n(cache, __ATOMIC_RELAXED) == uninitialized) {
6670
const char* str = ncclGetEnv(env);
6771
int64_t value = deftVal;
6872
if (str && strlen(str) > 0) {
6973
errno = 0;
7074
value = strtoll(str, nullptr, 0);
71-
if (errno) {
72-
value = deftVal;
73-
INFO(NCCL_ALL,"Invalid value %s for %s, using default %lld.", str, env, (long long)deftVal);
74-
} else {
75-
INFO(NCCL_ENV,"%s set by environment to %lld.", env, (long long)value);
76-
}
75+
value = errno ? deftVal : value;
7776
}
77+
// To prevent deadlock issues caused by logging in a loop,
78+
// so cache the value before the log operation.
7879
__atomic_store_n(cache, value, __ATOMIC_RELAXED);
80+
81+
if (errno) {
82+
INFO(NCCL_ALL,"Invalid value %s for %s, using default %lld.", str, env, (long long)deftVal);
83+
} else {
84+
INFO(NCCL_ENV,"%s set by environment to %lld.", env, (long long)value);
85+
}
7986
}
8087
pthread_mutex_unlock(&mutex);
8188
}
@@ -84,4 +91,4 @@ const char *ncclGetEnv(const char *name) {
8491
static pthread_once_t once = PTHREAD_ONCE_INIT;
8592
pthread_once(&once, initEnv);
8693
return getenv(name);
87-
}
94+
}

0 commit comments

Comments
 (0)