Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
#
# Copyright (c) 2021 Nanook Consulting All rights reserved.
# Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down
21 changes: 21 additions & 0 deletions src/mca/ess/base/ess_base_bootstrap.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,15 @@ int prte_ess_base_bootstrap(void)

/* identify and cache the option */
if (0 == strcmp(line, "ClusterName")) {
if (NULL != cluster) {
free(cluster);
}
cluster = strdup(ptr);

} else if (0 == strcmp(line, "DVMControllerHost")) {
if (NULL != ctrlhost) {
free(ctrlhost);
}
ctrlhost = strdup(ptr);

} else if (0 == strcmp(line, "DVMControllerPort")) {
Expand All @@ -163,18 +169,33 @@ int prte_ess_base_bootstrap(void)
prtedport = strtoul(ptr, NULL, 10);

} else if (0 == strcmp(line, "DVMNodes")) {
if (NULL == dvmnodes) {
free(dvmnodes);
}
dvmnodes = strdup(ptr);

} else if (0 == strcmp(line, "DVMTempDir")) {
if (NULL == dvmtmpdir) {
free(dvmtmpdir);
}
dvmtmpdir = strdup(ptr);

} else if (0 == strcmp(line, "SessionTmpDir")) {
if (NULL != sessiontmpdir) {
free(sessiontmpdir);
}
sessiontmpdir = strdup(ptr);

} else if (0 == strcmp(line, "ControllerLogPath")) {
if (NULL != ctrllogpath) {
free(ctrllogpath);
}
ctrllogpath = strdup(ptr);

} else if (0 == strcmp(line, "PRTEDLogPath")) {
if (NULL != prtedlogpath) {
free(prtedlogpath);
}
prtedlogpath = strdup(ptr);
}
free(line);
Expand Down
32 changes: 28 additions & 4 deletions src/mca/grpcomm/direct/grpcomm_direct_group.c
Original file line number Diff line number Diff line change
Expand Up @@ -586,19 +586,43 @@ void prte_grpcomm_direct_grp_recv(int status, pmix_proc_t *sender,
PMIx_Info_list_convert(coll->grpinfo, &darray);
info = (pmix_info_t*)darray.array;
ninfo = darray.size;
PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
rc = PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_DATA_BUFFER_RELEASE(reply);
PMIX_RELEASE(sig);
return;
}
if (0 < ninfo) {
PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
rc = PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_DATA_BUFFER_RELEASE(reply);
PMIX_RELEASE(sig);
return;
}
}
PMIX_DATA_ARRAY_DESTRUCT(&darray);

// pack any endpts
PMIx_Info_list_convert(coll->endpts, &darray);
info = (pmix_info_t*)darray.array;
ninfo = darray.size;
PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
rc = PMIx_Data_pack(NULL, reply, &ninfo, 1, PMIX_SIZE);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_DATA_BUFFER_RELEASE(reply);
PMIX_RELEASE(sig);
return;
}
if (0 < ninfo) {
PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
rc = PMIx_Data_pack(NULL, reply, info, ninfo, PMIX_INFO);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_DATA_BUFFER_RELEASE(reply);
PMIX_RELEASE(sig);
return;
}
}
PMIX_DATA_ARRAY_DESTRUCT(&darray);
}
Expand Down
33 changes: 10 additions & 23 deletions src/mca/odls/base/odls_base_default_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,8 +655,8 @@ int prte_odls_base_default_construct_child_list(pmix_data_buffer_t *buffer, pmix
/* add any cache'd values to the front of the job attributes */
for (m = 0; m < ninfo; m++) {
if (0 == strcmp(info[m].key, PMIX_SET_ENVAR)) {
envt.envar = strdup(info[m].value.data.envar.envar);
envt.value = strdup(info[m].value.data.envar.value);
envt.envar = info[m].value.data.envar.envar;
envt.value = info[m].value.data.envar.value;
envt.separator = info[m].value.data.envar.separator;
prte_prepend_attribute(&jdata->attributes, PRTE_JOB_SET_ENVAR, PRTE_ATTR_GLOBAL,
&envt, PMIX_ENVAR);
Expand Down Expand Up @@ -1112,6 +1112,9 @@ static void process_envars(prte_job_t *jdata,
bool found;

PMIX_LIST_FOREACH(attr, &jdata->attributes, prte_attribute_t) {
if (PMIX_ENVAR != attr->data.type) {
continue;
}
val = &attr->data;
envar = &val->data.envar;
if (attr->key == PRTE_JOB_SET_ENVAR) {
Expand Down Expand Up @@ -1186,6 +1189,9 @@ static void process_envars(prte_job_t *jdata,

// app trumps job, so do it after the job
PMIX_LIST_FOREACH(attr, &app->attributes, prte_attribute_t) {
if (PMIX_ENVAR != attr->data.type) {
continue;
}
val = &attr->data;
envar = &val->data.envar;
if (attr->key == PRTE_APP_SET_ENVAR) {
Expand Down Expand Up @@ -1252,15 +1258,14 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
int j, idx;
int total_num_local_procs = 0;
prte_odls_launch_local_t *caddy = (prte_odls_launch_local_t *) cbdata;
prte_job_t *jobdat, *parent;
prte_job_t *jobdat;
pmix_nspace_t job;
prte_odls_base_fork_local_proc_fn_t fork_local = caddy->fork_local;
bool index_argv, inherit;
bool index_argv;
char *msg, **xfer;
prte_odls_spawn_caddy_t *cd;
prte_event_base_t *evb;
prte_schizo_base_module_t *schizo;
pmix_proc_t *nptr;
PRTE_HIDE_UNUSED_PARAMS(fd, sd);

PMIX_ACQUIRE_OBJECT(caddy);
Expand Down Expand Up @@ -1353,20 +1358,6 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
}
}

// see if we have a parent in case of inheritance
nptr = NULL;
prte_get_attribute(&jobdat->attributes, PRTE_JOB_LAUNCH_PROXY, (void **) &nptr, PMIX_PROC);
if (NULL != nptr) {
parent = prte_get_job_data_object(nptr->nspace);
if (NULL != parent) {
inherit = prte_get_attribute(&parent->attributes, PRTE_JOB_INHERIT, NULL, PMIX_BOOL);
} else {
inherit = false;
}
} else {
inherit = false;
}

for (j = 0; j < jobdat->apps->size; j++) {
app = (prte_app_context_t *) pmix_pointer_array_get_item(jobdat->apps, j);
if (NULL == app) {
Expand Down Expand Up @@ -1410,10 +1401,6 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
}

// process any provided env directives
if (inherit) {
// start with the parent's directives
process_envars(parent, app);
}
process_envars(jobdat, app);


Expand Down
2 changes: 1 addition & 1 deletion src/mca/rmaps/base/rmaps_base_frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ int prte_rmaps_base_set_mapping_policy(prte_job_t *jdata, char *inspec)
cptr = strdup(ck[0]);
*ptr = '='; // restore the option
++ptr;
if (NULL == ptr) {
if ('\0' == *ptr) {
/* malformed option */
pmix_show_help("help-prte-rmaps-base.txt", "unrecognized-policy",
true, "mapping", ck[0]);
Expand Down
122 changes: 120 additions & 2 deletions src/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ static int map_colocate(prte_job_t *jdata,
uint16_t procs_per_target,
prte_rmaps_options_t *options);

static void inherit_env_directives(prte_job_t *jdata,
prte_job_t *parent,
pmix_proc_t *proxy);

void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
{
prte_state_caddy_t *caddy = (prte_state_caddy_t *) cbdata;
Expand All @@ -70,7 +74,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
prte_job_t *parent = NULL;
prte_app_context_t *app;
bool inherit = false;
pmix_proc_t *nptr, *target_proc;
pmix_proc_t *nptr = NULL, *target_proc;
char *tmp, **ck, **env;
uint16_t u16 = 0, procs_per_target = 0;
uint16_t *u16ptr = &u16;
Expand Down Expand Up @@ -229,6 +233,11 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
/* if this is a dynamic job launch and they didn't explicitly
* request inheritance, then don't inherit the launch directives */
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_LAUNCH_PROXY, (void **) &nptr, PMIX_PROC)) {
if (NULL == nptr) {
PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND);
PRTE_ACTIVATE_JOB_STATE(jdata, PRTE_JOB_STATE_MAP_FAILED);
goto cleanup;
}
/* if the launch proxy is me, then this is the initial launch from
* a proxy scenario, so we don't really have a parent */
if (PMIX_CHECK_NSPACE(PRTE_PROC_MY_NAME->nspace, nptr->nspace)) {
Expand Down Expand Up @@ -263,7 +272,6 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
} else {
inherit = true;
}
PMIX_PROC_RELEASE(nptr);
} else {
/* initial launch always takes on default MCA params for non-specified policies */
inherit = true;
Expand Down Expand Up @@ -309,6 +317,8 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
}
}
// copy over any env directives, but do not overwrite anything already specified
inherit_env_directives(jdata, parent, nptr);
} else {
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL) &&
!prte_get_attribute(&jdata->attributes, PRTE_JOB_CORE_CPUS, NULL, PMIX_BOOL)) {
Expand All @@ -321,6 +331,9 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}
}
if (NULL != nptr) {
PMIX_PROC_RELEASE(nptr);
}

/* we always inherit a parent's oversubscribe flag unless the job assigned it */
if (NULL != parent &&
Expand Down Expand Up @@ -1250,3 +1263,108 @@ static int map_colocate(prte_job_t *jdata,
PMIX_LIST_DESTRUCT(&targets);
return ret;
}

static void inherit_env_directives(prte_job_t *jdata,
prte_job_t *parent,
pmix_proc_t *proxy)
{
prte_app_context_t *app, *app2;
prte_proc_t *p;
prte_attribute_t *attr, *attr2;
pmix_value_t *val, *val2;
pmix_envar_t *envar, *envar2;
int n;
bool exists;

// deal with job-level attributes first
PMIX_LIST_FOREACH(attr, &parent->attributes, prte_attribute_t) {
if (PMIX_ENVAR != attr->data.type) {
continue;
}
val = &attr->data;
envar = &val->data.envar;

// do we have a matching attribute in the new job?
exists = false;
PMIX_LIST_FOREACH(attr2, &jdata->attributes, prte_attribute_t) {
if (PMIX_ENVAR != attr->data.type) {
continue;
}
val2 = &attr2->data;
envar2 = &val2->data.envar;

if (attr->key == attr2->key) {
// operation is same - check if the target envars match
if (0 == strcmp(envar->envar, envar2->envar)) {
// these match, so don't overwrite it
exists = true;
break;
}
}
}

if (exists) {
// leave this alone
continue;
}

// if it doesn't exist, then inherit it
prte_prepend_attribute(&jdata->attributes, attr->key, PRTE_ATTR_GLOBAL,
envar, PMIX_ENVAR);
}

/* There is no one-to-one correlation between the apps, but we can
* inherit the directives from the proc that called spawn, so do that
* much here */
p = prte_get_proc_object(proxy);
if (NULL == p) {
PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND);
return;
}
app = (prte_app_context_t*)pmix_pointer_array_get_item(parent->apps, p->app_idx);
if (NULL == app) {
PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND);
return;
}
for (n=0; n < jdata->apps->size; n++) {
app2 = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, n);
if (NULL == app2) {
continue;
}
PMIX_LIST_FOREACH(attr, &app->attributes, prte_attribute_t) {
if (PMIX_ENVAR != attr->data.type) {
continue;
}
val = &attr->data;
envar = &val->data.envar;

exists = false;
PMIX_LIST_FOREACH(attr2, &app2->attributes, prte_attribute_t) {
if (PMIX_ENVAR != attr->data.type) {
continue;
}
val2 = &attr2->data;
envar2 = &val2->data.envar;

if (attr->key == attr2->key) {
// operation is same - check if the target envars match
if (0 == strcmp(envar->envar, envar2->envar)) {
// these match, so don't overwrite it
exists = true;
break;
}
}
}

if (exists) {
// leave this alone
continue;
}

// if it doesn't exist, then inherit it
prte_prepend_attribute(&app2->attributes, attr->key, PRTE_ATTR_GLOBAL,
envar, PMIX_ENVAR);
}
}

}
4 changes: 1 addition & 3 deletions src/mca/rmaps/round_robin/rmaps_rr_mappers.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,7 @@ int prte_rmaps_rr_bycpu(prte_job_t *jdata, prte_app_context_t *app,
if (PRTE_ERR_SILENT != rc) {
pmix_show_help("help-prte-rmaps-rr.txt",
"prte-rmaps-rr:not-enough-cpus", true,
(NULL == app) ? "N/A" : app->app,
(NULL == app) ? -1 : app->num_procs,
savecpuset);
app->app, app->num_procs, savecpuset);
}
if (NULL != savecpuset) {
free(savecpuset);
Expand Down
Loading
Loading