Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions controllers/controllers_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ var _ = BeforeSuite(func() {
fakeRecorder = record.NewFakeRecorder(20)
// Create a ReconcileNodeMaintenance object with the scheme and fake client
r = &NodeMaintenanceReconciler{
Client: k8sClient,
Scheme: scheme.Scheme,
MgrConfig: cfg,
LeaseManager: &mockLeaseManager{mockManager},
Recorder: fakeRecorder,
logger: ctrl.Log.WithName("unit test"),
Client: k8sClient,
Scheme: scheme.Scheme,
MgrConfig: cfg,
LeaseManager: &mockLeaseManager{mockManager},
Recorder: fakeRecorder,
logger: ctrl.Log.WithName("unit test"),
DrainerTimeout: DefaultDrainerTimeout,
}
ctx, cancel = context.WithCancel(ctrl.SetupSignalHandler())
drainer, err = createDrainer(ctx, cfg)
drainer, err = createDrainer(ctx, cfg, r.DrainerTimeout)
Expect(err).NotTo(HaveOccurred())
// in test pods are not evicted, so don't wait forever for them
drainer.SkipWaitForDeleteTimeoutSeconds = 0
Expand Down
14 changes: 8 additions & 6 deletions controllers/nodemaintenance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ const (
expectedNodeNotFoundErrorMsg = "nodes \"%s\" not found"

//lease consts
LeaseHolderIdentity = "node-maintenance"
LeaseDuration = 3600 * time.Second
DrainerTimeout = 30 * time.Second
LeaseHolderIdentity = "node-maintenance"
LeaseDuration = 3600 * time.Second
DefaultDrainerTimeout = 30 * time.Second
)

// NodeMaintenanceReconciler reconciles a NodeMaintenance object
Expand All @@ -67,6 +67,8 @@ type NodeMaintenanceReconciler struct {
LeaseManager lease.Manager
Recorder record.EventRecorder
logger logr.Logger

DrainerTimeout time.Duration
}

// SetupWithManager sets up the controller with the Manager.
Expand Down Expand Up @@ -121,7 +123,7 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
r.logger.Info("Error reading the request object, requeuing.")
return emptyResult, err
}
drainer, err := createDrainer(ctx, r.MgrConfig)
drainer, err := createDrainer(ctx, r.MgrConfig, r.DrainerTimeout)
if err != nil {
return emptyResult, err
}
Expand Down Expand Up @@ -257,7 +259,7 @@ func (r *NodeMaintenanceReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}

// createDrainer creates a drain.Helper struct for external cordon and drain API
func createDrainer(ctx context.Context, mgrConfig *rest.Config) (*drain.Helper, error) {
func createDrainer(ctx context.Context, mgrConfig *rest.Config, timeout time.Duration) (*drain.Helper, error) {
drainer := &drain.Helper{}

//Continue even if there are pods not managed by a ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet.
Expand All @@ -284,7 +286,7 @@ func createDrainer(ctx context.Context, mgrConfig *rest.Config) (*drain.Helper,

// TODO - add logical value or attach from the maintenance CR
//The length of time to wait before giving up, zero means infinite
drainer.Timeout = DrainerTimeout
drainer.Timeout = timeout

cs, err := kubernetes.NewForConfig(mgrConfig)
if err != nil {
Expand Down
31 changes: 20 additions & 11 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"os"
"path/filepath"
"runtime"
"time"

"github.com/medik8s/common/pkg/lease"
"go.uber.org/zap/zapcore"
Expand Down Expand Up @@ -52,7 +53,7 @@ const (
WebhookCertDir = "/apiserver.local.config/certificates"
WebhookCertName = "apiserver.crt"
WebhookKeyName = "apiserver.key"
operatorName = "NodeMaintenance"
operatorName = "NodeMaintenance"
)

var (
Expand All @@ -69,16 +70,24 @@ func init() {

func main() {
var (
metricsAddr, probeAddr string
metricsAddr, probeAddr string
enableLeaderElection, enableHTTP2 bool
webhookOpts webhook.Options
)
webhookOpts webhook.Options
drainerTimeout time.Duration
)
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&enableHTTP2, "enable-http2", false, "If HTTP/2 should be enabled for the metrics and webhook servers.")
flag.DurationVar(&drainerTimeout, "drainer-timeout", controllers.DefaultDrainerTimeout, "Timeout for draining a node.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make sense to check for reasonable values?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not relevant to this PR, but this looks to me like something that would make sense to put in a configuration not ?
Would also simplify values validation.


if drainerTimeout <= 0 {
setupLog.Error(fmt.Errorf("got non positive DrainerTimeout='%s'", drainerTimeout), "Invalid DrainerTimout")
os.Exit(1)
}
setupLog.Info(fmt.Sprintf("Using drainer timeout: %s", drainerTimeout))

opts := zap.Options{
Development: true,
Expand All @@ -94,7 +103,7 @@ func main() {
configureWebhookOpts(&webhookOpts, enableHTTP2)

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Scheme: scheme,
WebhookServer: webhook.NewServer(webhookOpts),
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
Expand All @@ -105,31 +114,31 @@ func main() {
os.Exit(1)
}


cl := mgr.GetClient()
leaseManagerInitializer := &leaseManagerInitializer{cl: cl}
if err := mgr.Add(leaseManagerInitializer); err != nil {
setupLog.Error(err, "unable to set up lease Manager", "lease", operatorName)
os.Exit(1)
}
openshiftCheck,err := utils.NewOpenshiftValidator(mgr.GetConfig())

openshiftCheck, err := utils.NewOpenshiftValidator(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "failed to check if we run on Openshift")
os.Exit(1)
}
isOpenShift := openshiftCheck.IsOpenshiftSupported()
if isOpenShift{
if isOpenShift {
setupLog.Info("NMO was installed on Openshift cluster")
}


if err = (&controllers.NodeMaintenanceReconciler{
Client: cl,
Scheme: mgr.GetScheme(),
MgrConfig: mgr.GetConfig(),
LeaseManager: leaseManagerInitializer,
Recorder: mgr.GetEventRecorderFor(operatorName),
Recorder: mgr.GetEventRecorderFor(operatorName),

DrainerTimeout: drainerTimeout,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", operatorName)
os.Exit(1)
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/node_maintenance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ func createTestDeployment() {
Args: []string{"-c", "while true; do echo hello; sleep 10;done"},
}},
// make sure we run into the drain timeout at least once
TerminationGracePeriodSeconds: ptr.To[int64](int64(nodemaintenance.DrainerTimeout.Seconds()) + 50),
TerminationGracePeriodSeconds: ptr.To[int64](int64(nodemaintenance.DefaultDrainerTimeout.Seconds()) + 50),
},
},
},
Expand Down