Skip to content

Commit 8057edd

Browse files
committed
chore(nvidia): provide more test job info
1 parent 1058536 commit 8057edd

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

test/cases/nvidia/mpi_test.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,10 @@ func multiNode(testName string) features.Feature {
107107
if err != nil {
108108
t.Error(err)
109109
}
110+
t.Logf("final mpijob resource: %v", mpiJob)
110111
log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), mpiJob)
111112
if err != nil {
112-
t.Error(err)
113+
t.Errorf("failed to get job logs: %v", err)
113114
}
114115
t.Logf("Test log for %s:", jobName)
115116
t.Log(log)
@@ -167,9 +168,11 @@ func singleNode() features.Feature {
167168
err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).ResourceMatch(mpiJob, mpijobs.MPIJobSucceeded),
168169
wait.WithContext(ctx))
169170
if err != nil {
170-
t.Fatal(err)
171+
t.Error(err)
172+
} else {
173+
t.Log("Single node job completed")
171174
}
172-
t.Log("Single node job completed")
175+
t.Logf("final mpijob resource: %v", mpiJob)
173176
return ctx
174177
}).
175178
Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
@@ -180,17 +183,17 @@ func singleNode() features.Feature {
180183
}
181184
u, ok := job.(*unstructured.Unstructured)
182185
if !ok {
183-
t.Fatalf("mpiJob in context is not unstructured: %v", job)
186+
t.Errorf("mpiJob in context is not unstructured: %v", job)
184187
}
185188
log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), u)
186189
if err != nil {
187-
t.Fatal(err)
190+
t.Errorf("failed to get job logs: %v", err)
188191
}
189192
t.Log("Test log for pytorch-training-single-node:")
190193
t.Log(log)
191194
err = fwext.DeleteManifests(cfg.Client().RESTConfig(), renderedSingleNodeManifest)
192195
if err != nil {
193-
t.Fatal(err)
196+
t.Error(err)
194197
}
195198
return ctx
196199
}).

0 commit comments

Comments
 (0)