Skip to content

Commit 0814d75

Browse files
committed
Healthcheck plugins on dispense, re-instantiating broken ones
1 parent db858bb commit 0814d75

File tree

2 files changed

+101
-37
lines changed

2 files changed

+101
-37
lines changed

plugins/manager/manager.go

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -187,25 +187,45 @@ func (pm *PluginManager) killPluginLocked(pID plugins.PluginID) {
187187
// Dispense returns a PluginInstance for use by safely obtaining the
188188
// PluginInstance from storage if we have it.
189189
func (pm *PluginManager) Dispense(name, pluginType string) (PluginInstance, error) {
190-
191190
// Measure the time taken to dispense a plugin. This helps identify
192191
// contention and pressure obtaining plugin client handles.
193192
labels := []metrics.Label{{Name: "plugin_name", Value: name}, {Name: "plugin_type", Value: pluginType}}
194193
defer metrics.MeasureSinceWithLabels([]string{"plugin", "manager", "access_ms"}, time.Now(), labels)
195194

196-
pm.pluginInstancesLock.RLock()
197-
defer pm.pluginInstancesLock.RUnlock()
195+
pID := plugins.PluginID{Name: name, PluginType: pluginType}
198196

199197
// Attempt to pull our plugin instance from the store and pass this to the
200-
// caller.
201-
//
202-
// TODO(jrasell) if we do not find the instance, we should probably try and
203-
// dispense the plugin. We should also check the plugin instance has not
204-
// exited.
205-
inst, ok := pm.pluginInstances[plugins.PluginID{Name: name, PluginType: pluginType}]
198+
// caller. If the plugin isn't in the store, try to instantiate it; if it
199+
// is, do a health check and attempt to re-instantiate if it fails.
200+
pm.pluginInstancesLock.Lock()
201+
inst, exists := pm.pluginInstances[pID]
202+
203+
if exists {
204+
if _, err := pm.pluginInfo(pID, inst); err == nil {
205+
// Plugin is fine, return it
206+
pm.pluginInstancesLock.Unlock()
207+
return inst, nil
208+
}
209+
210+
// The plugin exists, but is broken. Remove it.
211+
pm.logger.Warn("plugin failed healthcheck", "plugin_name", pID.Name)
212+
pm.killPluginLocked(pID)
213+
} else {
214+
pm.logger.Warn("plugin not in store", "plugin_name", pID.Name)
215+
}
216+
pm.pluginInstancesLock.Unlock()
217+
218+
if err := pm.dispensePlugins(); err != nil {
219+
return nil, fmt.Errorf("failed to dispense plugin: %q of type %q: %w", name, pluginType, err)
220+
}
221+
222+
pm.pluginInstancesLock.RLock()
223+
inst, ok := pm.pluginInstances[pID]
224+
pm.pluginInstancesLock.RUnlock()
206225
if !ok {
207226
return nil, fmt.Errorf("failed to dispense plugin: %q of type %q is not stored", name, pluginType)
208227
}
228+
209229
return inst, nil
210230
}
211231

@@ -322,7 +342,23 @@ func (pm *PluginManager) launchExternalPlugin(id plugins.PluginID, info *pluginI
322342
}
323343

324344
func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo, raw interface{}) (*base.PluginInfo, error) {
345+
pluginInfo, err := pm.pluginInfo(id, raw)
346+
if err != nil {
347+
return nil, err
348+
}
349+
350+
// If the plugin name, or plugin do not match it means the executed plugin
351+
// has returned its metadata that is different to the configured. This is a
352+
// problem, particularly in the PluginType sense as it means it will be
353+
// unable to fulfill its role.
354+
if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType {
355+
return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err)
356+
}
357+
358+
return pluginInfo, nil
359+
}
325360

361+
func (pm *PluginManager) pluginInfo(id plugins.PluginID, raw interface{}) (*base.PluginInfo, error) {
326362
// Check that the plugin implements the base plugin interface. As these are
327363
// external plugins we need to check this safely, otherwise an incorrect
328364
// plugin can cause the core application to panic.
@@ -336,14 +372,6 @@ func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo
336372
return nil, fmt.Errorf("failed to call PluginInfo on %s: %v", id.Name, err)
337373
}
338374

339-
// If the plugin name, or plugin do not match it means the executed plugin
340-
// has returned its metadata that is different to the configured. This is a
341-
// problem, particularly in the PluginType sense as it means it will be
342-
// unable to fulfill its role.
343-
if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType {
344-
return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err)
345-
}
346-
347375
return pluginInfo, nil
348376
}
349377

plugins/manager/manager_test.go

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ import (
99
"github.com/hashicorp/go-hclog"
1010
"github.com/hashicorp/nomad-autoscaler/agent/config"
1111
"github.com/hashicorp/nomad-autoscaler/plugins/apm"
12+
"github.com/hashicorp/nomad-autoscaler/plugins/base"
1213
"github.com/hashicorp/nomad-autoscaler/plugins/strategy"
1314
"github.com/hashicorp/nomad-autoscaler/plugins/target"
1415
"github.com/stretchr/testify/assert"
16+
"github.com/stretchr/testify/require"
1517
)
1618

1719
func TestLoad(t *testing.T) {
@@ -153,26 +155,7 @@ func TestDispense(t *testing.T) {
153155
{
154156
name: "external plugin",
155157
pluginDir: "../test/bin",
156-
cfg: map[string][]*config.Plugin{
157-
"apm": {
158-
&config.Plugin{
159-
Name: "noop",
160-
Driver: "noop-apm",
161-
},
162-
},
163-
"strategy": {
164-
&config.Plugin{
165-
Name: "noop",
166-
Driver: "noop-strategy",
167-
},
168-
},
169-
"target": {
170-
&config.Plugin{
171-
Name: "noop",
172-
Driver: "noop-target",
173-
},
174-
},
175-
},
158+
cfg: externalPlugins(),
176159
},
177160
}
178161

@@ -205,3 +188,56 @@ func TestDispense(t *testing.T) {
205188
})
206189
}
207190
}
191+
192+
func TestExternalPluginDies(t *testing.T) {
193+
logger := hclog.NewNullLogger()
194+
pm := NewPluginManager(logger, "../test/bin", externalPlugins())
195+
defer pm.KillPlugins()
196+
197+
err := pm.Load()
198+
require.NoError(t, err)
199+
pi, err := pm.Dispense("noop", "target")
200+
require.NoError(t, err)
201+
202+
// Verify we can talk to the plugin
203+
client := pi.Plugin().(base.Base)
204+
_, err = client.PluginInfo()
205+
require.NoError(t, err)
206+
207+
// Kill the plugin without the manager noticing
208+
pi.Kill()
209+
_, err = client.PluginInfo()
210+
require.Error(t, err)
211+
212+
// Now, re-dispense. The manager should recover
213+
pi, err = pm.Dispense("noop", "target")
214+
require.NoError(t, err)
215+
216+
// Verify that the returned plugin works
217+
client = pi.Plugin().(base.Base)
218+
_, err = client.PluginInfo()
219+
require.NoError(t, err)
220+
}
221+
222+
func externalPlugins() map[string][]*config.Plugin {
223+
return map[string][]*config.Plugin{
224+
"apm": {
225+
&config.Plugin{
226+
Name: "noop",
227+
Driver: "noop-apm",
228+
},
229+
},
230+
"strategy": {
231+
&config.Plugin{
232+
Name: "noop",
233+
Driver: "noop-strategy",
234+
},
235+
},
236+
"target": {
237+
&config.Plugin{
238+
Name: "noop",
239+
Driver: "noop-target",
240+
},
241+
},
242+
}
243+
}

0 commit comments

Comments
 (0)