Skip to content

Commit 360e6a8

Browse files
authored
Merge pull request #78 from openpmix/master
Fork Sync: Update from parent repository
2 parents e9f22ef + fb15a60 commit 360e6a8

File tree

17 files changed

+267
-60
lines changed

17 files changed

+267
-60
lines changed

include/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#
22
# Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
33
#
4-
# Copyright (c) 2021 Nanook Consulting All rights reserved.
4+
# Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
55
# $COPYRIGHT$
66
#
77
# Additional copyrights may follow

src/mca/ess/base/ess_base_bootstrap.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,15 @@ int prte_ess_base_bootstrap(void)
151151

152152
/* identify and cache the option */
153153
if (0 == strcmp(line, "ClusterName")) {
154+
if (NULL != cluster) {
155+
free(cluster);
156+
}
154157
cluster = strdup(ptr);
155158

156159
} else if (0 == strcmp(line, "DVMControllerHost")) {
160+
if (NULL != ctrlhost) {
161+
free(ctrlhost);
162+
}
157163
ctrlhost = strdup(ptr);
158164

159165
} else if (0 == strcmp(line, "DVMControllerPort")) {
@@ -163,18 +169,33 @@ int prte_ess_base_bootstrap(void)
163169
prtedport = strtoul(ptr, NULL, 10);
164170

165171
} else if (0 == strcmp(line, "DVMNodes")) {
172+
if (NULL == dvmnodes) {
173+
free(dvmnodes);
174+
}
166175
dvmnodes = strdup(ptr);
167176

168177
} else if (0 == strcmp(line, "DVMTempDir")) {
178+
if (NULL == dvmtmpdir) {
179+
free(dvmtmpdir);
180+
}
169181
dvmtmpdir = strdup(ptr);
170182

171183
} else if (0 == strcmp(line, "SessionTmpDir")) {
184+
if (NULL != sessiontmpdir) {
185+
free(sessiontmpdir);
186+
}
172187
sessiontmpdir = strdup(ptr);
173188

174189
} else if (0 == strcmp(line, "ControllerLogPath")) {
190+
if (NULL != ctrllogpath) {
191+
free(ctrllogpath);
192+
}
175193
ctrllogpath = strdup(ptr);
176194

177195
} else if (0 == strcmp(line, "PRTEDLogPath")) {
196+
if (NULL != prtedlogpath) {
197+
free(prtedlogpath);
198+
}
178199
prtedlogpath = strdup(ptr);
179200
}
180201
free(line);

src/mca/grpcomm/direct/grpcomm_direct_group.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -586,19 +586,43 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender,
586586
PMIx_Info_list_convert(coll->grpinfo, &darray);
587587
info = (pmix_info_t*)darray.array;
588588
ninfo = darray.size;
589-
PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
589+
rc = PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
590+
if (PMIX_SUCCESS != rc) {
591+
PMIX_ERROR_LOG(rc);
592+
PMIX_DATA_BUFFER_RELEASE(reply);
593+
PMIX_RELEASE(sig);
594+
return;
595+
}
590596
if (0 < ninfo) {
591-
PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
597+
rc = PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
598+
if (PMIX_SUCCESS != rc) {
599+
PMIX_ERROR_LOG(rc);
600+
PMIX_DATA_BUFFER_RELEASE(reply);
601+
PMIX_RELEASE(sig);
602+
return;
603+
}
592604
}
593605
PMIX_DATA_ARRAY_DESTRUCT(&darray);
594606

595607
// pack any endpts
596608
PMIx_Info_list_convert(coll->endpts, &darray);
597609
info = (pmix_info_t*)darray.array;
598610
ninfo = darray.size;
599-
PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
611+
rc = PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
612+
if (PMIX_SUCCESS != rc) {
613+
PMIX_ERROR_LOG(rc);
614+
PMIX_DATA_BUFFER_RELEASE(reply);
615+
PMIX_RELEASE(sig);
616+
return;
617+
}
600618
if (0 < ninfo) {
601-
PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
619+
rc = PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
620+
if (PMIX_SUCCESS != rc) {
621+
PMIX_ERROR_LOG(rc);
622+
PMIX_DATA_BUFFER_RELEASE(reply);
623+
PMIX_RELEASE(sig);
624+
return;
625+
}
602626
}
603627
PMIX_DATA_ARRAY_DESTRUCT(&darray);
604628
}

src/mca/odls/base/odls_base_default_fns.c

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -655,8 +655,8 @@ int prte_odls_base_default_construct_child_list(pmix_data_buffer_t *buffer, pmix
655655
/* add any cache'd values to the front of the job attributes */
656656
for (m = 0; m < ninfo; m++) {
657657
if (0 == strcmp(info[m].key, PMIX_SET_ENVAR)) {
658-
envt.envar = strdup(info[m].value.data.envar.envar);
659-
envt.value = strdup(info[m].value.data.envar.value);
658+
envt.envar = info[m].value.data.envar.envar;
659+
envt.value = info[m].value.data.envar.value;
660660
envt.separator = info[m].value.data.envar.separator;
661661
prte_prepend_attribute(&jdata->attributes, PRTE_JOB_SET_ENVAR, PRTE_ATTR_GLOBAL,
662662
&envt, PMIX_ENVAR);
@@ -1112,6 +1112,9 @@ static void process_envars(prte_job_t *jdata,
11121112
bool found;
11131113

11141114
PMIX_LIST_FOREACH(attr, &jdata->attributes, prte_attribute_t) {
1115+
if (PMIX_ENVAR != attr->data.type) {
1116+
continue;
1117+
}
11151118
val = &attr->data;
11161119
envar = &val->data.envar;
11171120
if (attr->key == PRTE_JOB_SET_ENVAR) {
@@ -1186,6 +1189,9 @@ static void process_envars(prte_job_t *jdata,
11861189

11871190
// app trumps job, so do it after the job
11881191
PMIX_LIST_FOREACH(attr, &app->attributes, prte_attribute_t) {
1192+
if (PMIX_ENVAR != attr->data.type) {
1193+
continue;
1194+
}
11891195
val = &attr->data;
11901196
envar = &val->data.envar;
11911197
if (attr->key == PRTE_APP_SET_ENVAR) {
@@ -1252,15 +1258,14 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
12521258
int j, idx;
12531259
int total_num_local_procs = 0;
12541260
prte_odls_launch_local_t *caddy = (prte_odls_launch_local_t *) cbdata;
1255-
prte_job_t *jobdat, *parent;
1261+
prte_job_t *jobdat;
12561262
pmix_nspace_t job;
12571263
prte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local;
1258-
bool index_argv, inherit;
1264+
bool index_argv;
12591265
char *msg, **xfer;
12601266
prte_odls_spawn_caddy_t *cd;
12611267
prte_event_base_t *evb;
12621268
prte_schizo_base_module_t *schizo;
1263-
pmix_proc_t *nptr;
12641269
PRTE_HIDE_UNUSED_PARAMS(fd, sd);
12651270

12661271
PMIX_ACQUIRE_OBJECT(caddy);
@@ -1353,20 +1358,6 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
13531358
}
13541359
}
13551360

1356-
// see if we have a parent in case of inheritance
1357-
nptr = NULL;
1358-
prte_get_attribute(&jobdat->attributes, PRTE_JOB_LAUNCH_PROXY, (void **) &nptr, PMIX_PROC);
1359-
if (NULL != nptr) {
1360-
parent = prte_get_job_data_object(nptr->nspace);
1361-
if (NULL != parent) {
1362-
inherit = prte_get_attribute(&parent->attributes, PRTE_JOB_INHERIT, NULL, PMIX_BOOL);
1363-
} else {
1364-
inherit = false;
1365-
}
1366-
} else {
1367-
inherit = false;
1368-
}
1369-
13701361
for (j = 0; j < jobdat->apps->size; j++) {
13711362
app = (prte_app_context_t *) pmix_pointer_array_get_item(jobdat->apps, j);
13721363
if (NULL == app) {
@@ -1410,10 +1401,6 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
14101401
}
14111402

14121403
// process any provided env directives
1413-
if (inherit) {
1414-
// start with the parent's directives
1415-
process_envars(parent, app);
1416-
}
14171404
process_envars(jobdat, app);
14181405

14191406

src/mca/rmaps/base/rmaps_base_frame.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ int prte_rmaps_base_set_mapping_policy(prte_job_t *jdata, char *inspec)
542542
cptr = strdup(ck[0]);
543543
*ptr = '='; // restore the option
544544
++ptr;
545-
if (NULL == ptr) {
545+
if ('\0' == *ptr) {
546546
/* malformed option */
547547
pmix_show_help("help-prte-rmaps-base.txt", "unrecognized-policy",
548548
true, "mapping", ck[0]);

src/mca/rmaps/base/rmaps_base_map_job.c

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ static int map_colocate(prte_job_t *jdata,
5757
uint16_t procs_per_target,
5858
prte_rmaps_options_t *options);
5959

60+
static void inherit_env_directives(prte_job_t *jdata,
61+
prte_job_t *parent,
62+
pmix_proc_t *proxy);
63+
6064
void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
6165
{
6266
prte_state_caddy_t *caddy = (prte_state_caddy_t *) cbdata;
@@ -70,7 +74,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
7074
prte_job_t *parent = NULL;
7175
prte_app_context_t *app;
7276
bool inherit = false;
73-
pmix_proc_t *nptr, *target_proc;
77+
pmix_proc_t *nptr = NULL, *target_proc;
7478
char *tmp, **ck, **env;
7579
uint16_t u16 = 0, procs_per_target = 0;
7680
uint16_t *u16ptr = &u16;
@@ -229,6 +233,11 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
229233
/* if this is a dynamic job launch and they didn't explicitly
230234
* request inheritance, then don't inherit the launch directives */
231235
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_LAUNCH_PROXY, (void **) &nptr, PMIX_PROC)) {
236+
if (NULL == nptr) {
237+
PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND);
238+
PRTE_ACTIVATE_JOB_STATE(jdata, PRTE_JOB_STATE_MAP_FAILED);
239+
goto cleanup;
240+
}
232241
/* if the launch proxy is me, then this is the initial launch from
233242
* a proxy scenario, so we don't really have a parent */
234243
if (PMIX_CHECK_NSPACE(PRTE_PROC_MY_NAME->nspace, nptr->nspace)) {
@@ -263,7 +272,6 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
263272
} else {
264273
inherit = true;
265274
}
266-
PMIX_PROC_RELEASE(nptr);
267275
} else {
268276
/* initial launch always takes on default MCA params for non-specified policies */
269277
inherit = true;
@@ -309,6 +317,8 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
309317
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
310318
}
311319
}
320+
// copy over any env directives, but do not overwrite anything already specified
321+
inherit_env_directives(jdata, parent, nptr);
312322
} else {
313323
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL) &&
314324
!prte_get_attribute(&jdata->attributes, PRTE_JOB_CORE_CPUS, NULL, PMIX_BOOL)) {
@@ -321,6 +331,9 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
321331
}
322332
}
323333
}
334+
if (NULL != nptr) {
335+
PMIX_PROC_RELEASE(nptr);
336+
}
324337

325338
/* we always inherit a parent's oversubscribe flag unless the job assigned it */
326339
if (NULL != parent &&
@@ -1250,3 +1263,108 @@ static int map_colocate(prte_job_t *jdata,
12501263
PMIX_LIST_DESTRUCT(&targets);
12511264
return ret;
12521265
}
1266+
1267+
static void inherit_env_directives(prte_job_t *jdata,
1268+
prte_job_t *parent,
1269+
pmix_proc_t *proxy)
1270+
{
1271+
prte_app_context_t *app, *app2;
1272+
prte_proc_t *p;
1273+
prte_attribute_t *attr, *attr2;
1274+
pmix_value_t *val, *val2;
1275+
pmix_envar_t *envar, *envar2;
1276+
int n;
1277+
bool exists;
1278+
1279+
// deal with job-level attributes first
1280+
PMIX_LIST_FOREACH(attr, &parent->attributes, prte_attribute_t) {
1281+
if (PMIX_ENVAR != attr->data.type) {
1282+
continue;
1283+
}
1284+
val = &attr->data;
1285+
envar = &val->data.envar;
1286+
1287+
// do we have a matching attribute in the new job?
1288+
exists = false;
1289+
PMIX_LIST_FOREACH(attr2, &jdata->attributes, prte_attribute_t) {
1290+
if (PMIX_ENVAR != attr->data.type) {
1291+
continue;
1292+
}
1293+
val2 = &attr2->data;
1294+
envar2 = &val2->data.envar;
1295+
1296+
if (attr->key == attr2->key) {
1297+
// operation is same - check if the target envars match
1298+
if (0 == strcmp(envar->envar, envar2->envar)) {
1299+
// these match, so don't overwrite it
1300+
exists = true;
1301+
break;
1302+
}
1303+
}
1304+
}
1305+
1306+
if (exists) {
1307+
// leave this alone
1308+
continue;
1309+
}
1310+
1311+
// if it doesn't exist, then inherit it
1312+
prte_prepend_attribute(&jdata->attributes, attr->key, PRTE_ATTR_GLOBAL,
1313+
envar, PMIX_ENVAR);
1314+
}
1315+
1316+
/* There is no one-to-one correlation between the apps, but we can
1317+
* inherit the directives from the proc that called spawn, so do that
1318+
* much here */
1319+
p = prte_get_proc_object(proxy);
1320+
if (NULL == p) {
1321+
PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND);
1322+
return;
1323+
}
1324+
app = (prte_app_context_t*)pmix_pointer_array_get_item(parent->apps, p->app_idx);
1325+
if (NULL == app) {
1326+
PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND);
1327+
return;
1328+
}
1329+
for (n=0; n < jdata->apps->size; n++) {
1330+
app2 = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, n);
1331+
if (NULL == app2) {
1332+
continue;
1333+
}
1334+
PMIX_LIST_FOREACH(attr, &app->attributes, prte_attribute_t) {
1335+
if (PMIX_ENVAR != attr->data.type) {
1336+
continue;
1337+
}
1338+
val = &attr->data;
1339+
envar = &val->data.envar;
1340+
1341+
exists = false;
1342+
PMIX_LIST_FOREACH(attr2, &app2->attributes, prte_attribute_t) {
1343+
if (PMIX_ENVAR != attr->data.type) {
1344+
continue;
1345+
}
1346+
val2 = &attr2->data;
1347+
envar2 = &val2->data.envar;
1348+
1349+
if (attr->key == attr2->key) {
1350+
// operation is same - check if the target envars match
1351+
if (0 == strcmp(envar->envar, envar2->envar)) {
1352+
// these match, so don't overwrite it
1353+
exists = true;
1354+
break;
1355+
}
1356+
}
1357+
}
1358+
1359+
if (exists) {
1360+
// leave this alone
1361+
continue;
1362+
}
1363+
1364+
// if it doesn't exist, then inherit it
1365+
prte_prepend_attribute(&app2->attributes, attr->key, PRTE_ATTR_GLOBAL,
1366+
envar, PMIX_ENVAR);
1367+
}
1368+
}
1369+
1370+
}

src/mca/rmaps/round_robin/rmaps_rr_mappers.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -545,9 +545,7 @@ int prte_rmaps_rr_bycpu(prte_job_t *jdata, prte_app_context_t *app,
545545
if (PRTE_ERR_SILENT != rc) {
546546
pmix_show_help("help-prte-rmaps-rr.txt",
547547
"prte-rmaps-rr:not-enough-cpus", true,
548-
(NULL == app) ? "N/A" : app->app,
549-
(NULL == app) ? -1 : app->num_procs,
550-
savecpuset);
548+
app->app, app->num_procs, savecpuset);
551549
}
552550
if (NULL != savecpuset) {
553551
free(savecpuset);

0 commit comments

Comments
 (0)