Skip to content

Commit 2d6ab3a

Browse files
authored
Merge pull request #601 from jgehrcke/jp/perm-error-in-prepare-path
Return strict-decode errors in NodePrepareResources() as permanentError
2 parents 337b593 + 12da0f5 commit 2d6ab3a

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

cmd/compute-domain-kubelet-plugin/device_state.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ func (s *DeviceState) getConfigResultsMap(rcs *resourceapi.ResourceClaimStatus,
509509
rcs.Allocation.Devices.Config,
510510
)
511511
if err != nil {
512-
return nil, fmt.Errorf("error getting opaque device configs: %v", err)
512+
return nil, fmt.Errorf("error getting opaque device configs: %w", err)
513513
}
514514

515515
// Add the default ComputeDomainConfig to the front of the config list with the
@@ -613,7 +613,11 @@ func GetOpaqueDeviceConfigs(
613613

614614
decodedConfig, err := runtime.Decode(decoder, config.Opaque.Parameters.Raw)
615615
if err != nil {
616-
return nil, fmt.Errorf("error decoding config parameters: %w", err)
616+
// Bad opaque config: i) do not retry preparing this resource
617+
// internally and ii) return notion of permanent error to kubelet,
618+
// to give it an opportunity to play this error back to the user so
619+
// that it becomes actionable.
620+
return nil, permanentError{fmt.Errorf("error decoding config parameters: %w", err)}
617621
}
618622

619623
resultConfig := &OpaqueDeviceConfig{

cmd/compute-domain-kubelet-plugin/driver.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ func (d *driver) HandleError(ctx context.Context, err error, msg string) {
221221
runtime.HandleErrorWithContext(ctx, err, msg)
222222
}
223223

224+
// nodePrepareResource() returns a 2-tuple; the first value is a boolean
225+
// indicating whether the work is 'done', the second value is a result which can
226+
// also reflect an error. Set the boolean to `true` for any result wrapping a
227+
// non-retryable error.
224228
func (d *driver) nodePrepareResource(ctx context.Context, claim *resourceapi.ResourceClaim) (bool, kubeletplugin.PrepareResult) {
225229
release, err := d.pulock.Acquire(ctx, flock.WithTimeout(10*time.Second))
226230
if err != nil {

0 commit comments

Comments
 (0)