Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 34 additions & 17 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,42 @@
---
version: "2"
linters:
enable:
- misspell
- revive
- sloglint
disable:
- unused

issues:
exclude-rules:
- path: _test.go
linters:
- errcheck

linters-settings:
errcheck:
exclude-functions:
# Used in HTTP handlers, any error is handled by the server itself.
- (net/http.ResponseWriter).Write
revive:
settings:
errcheck:
exclude-functions:
# Used in HTTP handlers, any error is handled by the server itself
- (net/http.ResponseWriter).Write
revive:
rules:
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#unused-parameter
- name: unused-parameter
severity: warning
disabled: true
exclusions:
generated: lax
presets:
- comments
- common-false-positives
- legacy
- std-error-handling
rules:
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#unused-parameter
- name: unused-parameter
severity: warning
disabled: true
- linters:
- errcheck
path: _test.go
paths:
- third_party$
- builtin$
- examples$
formatters:
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$
34 changes: 10 additions & 24 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
package main

import (
"fmt"
"log/slog"
"net/http"
"os"
Expand Down Expand Up @@ -102,30 +101,31 @@ var (
"The interval between smartctl polls",
).Default("60s").Duration()
smartctlRescanInterval = kingpin.Flag("smartctl.rescan",
"The interval between rescanning for new/disappeared devices. If the interval is smaller than 1s no rescanning takes place. If any devices are configured with smartctl.device also no rescanning takes place.",
"The interval between rescanning for new/disappeared devices. If the interval is smaller than 1s no rescanning takes place. If any devices are configured with smartctl.device also no rescanning takes place",
).Default("10m").Duration()
smartctlScan = kingpin.Flag("smartctl.scan", "Enable scanning. This is a default if no devices are specified").Default("false").Bool()
smartctlScan = kingpin.Flag("smartctl.scan", "Enable scanning. This is a default if no devices are specified",
).Default("false").Bool()
smartctlDevices = kingpin.Flag("smartctl.device",
"The device to monitor. Device type can be specified after a semicolon, eg. '/dev/bus/0;megaraid,1' (repeatable)",
).Strings()
smartctlDeviceExclude = kingpin.Flag(
"smartctl.device-exclude",
"Regexp of devices to exclude from automatic scanning. (mutually exclusive to device-include)",
"Regexp of devices to exclude from automatic scanning (mutually exclusive to device-include)",
).Default("").String()
smartctlDeviceInclude = kingpin.Flag(
"smartctl.device-include",
"Regexp of devices to exclude from automatic scanning. (mutually exclusive to device-exclude)",
"Regexp of devices to exclude from automatic scanning (mutually exclusive to device-exclude)",
).Default("").String()
smartctlScanDeviceTypes = kingpin.Flag(
"smartctl.scan-device-type",
"Device type to use during automatic scan. Special by-id value forces predictable device names. (repeatable)",
"Device type to use during automatic scan. Special by-id value forces predictable device names (repeatable)",
).Strings()
smartctlFakeData = kingpin.Flag("smartctl.fake-data",
"The device to monitor (repeatable)",
).Default("false").Hidden().Bool()
smartctlPowerModeCheck = kingpin.Flag("smartctl.powermode-check",
"Whether or not to check powermode before fetching data",
).Default("standby").String()
"Whether or not to check powermode before fetching data. Must be one of: 'never', 'sleep', 'standby', 'idle'. Default is 'standby'",
).Default("standby").Enum("never", "sleep", "standby", "idle")
)

// scanDevices uses smartctl to gather the list of available devices.
Expand Down Expand Up @@ -177,15 +177,6 @@ func buildDevicesFromFlag(devices []Device) []Device {
return devices
}

func validatePowerMode(mode string) error {
switch strings.ToLower(mode) {
case "never", "sleep", "standby", "idle":
return nil
default:
return fmt.Errorf("invalid power mode: %s. Must be one of: never, sleep, standby, idle", mode)
}
}

func main() {
metricsPath := kingpin.Flag(
"web.telemetry-path", "Path under which to expose metrics",
Expand All @@ -198,11 +189,6 @@ func main() {
kingpin.HelpFlag.Short('h')
kingpin.Parse()
logger := promslog.New(promslogConfig)

if err := validatePowerMode(*smartctlPowerModeCheck); err != nil {
logger.Error(err.Error())
os.Exit(1)
}
logger.Info("Starting smartctl_exporter", "version", version.Info())
logger.Info("Build context", "build_context", version.BuildContext())
var devices []Device
Expand Down Expand Up @@ -258,15 +244,15 @@ func main() {
}
landingPage, err := web.NewLandingPage(landingConfig)
if err != nil {
logger.Error("error creating landing page", "err", err)
logger.Error("Error creating landing page", "err", err)
os.Exit(1)
}
http.Handle("/", landingPage)
}

srv := &http.Server{}
if err := web.ListenAndServe(srv, toolkitFlags, logger); err != nil {
logger.Error("error running HTTP server", "err", err)
logger.Error("Error running HTTP server", "err", err)
os.Exit(1)
}
}
25 changes: 12 additions & 13 deletions readjson.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,22 @@ func readSMARTctl(logger *slog.Logger, device Device, wg *sync.WaitGroup) {
logger.Debug("Calling smartctl with args", "args", strings.Join(smartctlArgs, " "))
out, err := exec.Command(*smartctlPath, smartctlArgs...).Output()
if err != nil {
logger.Warn("S.M.A.R.T. output reading", "err", err, "device", device)
logger.Warn("S.M.A.R.T. output reading", "device", device.Label, "type", device.Type, "err", err)
}
// Accommodate a smartmontools pre-7.3 bug
cleaned_out := strings.TrimPrefix(string(out), " Pending defect count:")
json := parseJSON(cleaned_out)
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json)
logger.Debug("Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start))
logger.Debug("Collected S.M.A.R.T. json data", "device", device.Label, "type", device.Type, "duration", time.Since(start))
if rcOk && jsonOk {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
}
}

func readSMARTctlDevices(logger *slog.Logger) gjson.Result {
logger.Debug("Scanning for devices")
var scanArgs []string = []string{"--json", "--scan"}
var scanArgs = []string{"--json", "--scan"}
for _, d := range *smartctlScanDeviceTypes {
scanArgs = append(scanArgs, "--device", d)
}
Expand Down Expand Up @@ -127,7 +127,7 @@ func readData(logger *slog.Logger, device Device) gjson.Result {

cacheValue, found := jsonCache.Load(device)
if !found {
logger.Warn("device not found", "device", device)
logger.Warn("Device not found", "device", device.Label, "type", device.Type)
return gjson.Result{}
}
return cacheValue.(JSONCache).JSON
Expand All @@ -139,30 +139,30 @@ func resultCodeIsOk(logger *slog.Logger, device Device, SMARTCtlResult int64) bo
if SMARTCtlResult > 0 {
b := SMARTCtlResult
if (b & 1) != 0 {
logger.Error("Command line did not parse", "device", device)
logger.Error("Command line did not parse", "device", device.Label, "type", device.Type)
result = false
}
if (b & (1 << 1)) != 0 {
logger.Error("Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device)
logger.Error("Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Label, "type", device.Type)
result = false
}
if (b & (1 << 2)) != 0 {
logger.Warn("Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device)
logger.Warn("Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Label, "type", device.Type)
}
if (b & (1 << 3)) != 0 {
logger.Warn("SMART status check returned 'DISK FAILING'", "device", device)
logger.Warn("SMART status check returned 'DISK FAILING'", "device", device.Label, "type", device.Type)
}
if (b & (1 << 4)) != 0 {
logger.Warn("We found prefail Attributes <= threshold", "device", device)
logger.Warn("We found prefail Attributes <= threshold", "device", device.Label, "type", device.Type)
}
if (b & (1 << 5)) != 0 {
logger.Warn("SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device)
logger.Warn("SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Label, "type", device.Type)
}
if (b & (1 << 6)) != 0 {
logger.Warn("The device error log contains records of errors", "device", device)
logger.Warn("The device error log contains records of errors", "device", device.Label, "type", device.Type)
}
if (b & (1 << 7)) != 0 {
logger.Warn("The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device)
logger.Warn("The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Label, "type", device.Type)
}
}
return result
Expand All @@ -171,7 +171,6 @@ func resultCodeIsOk(logger *slog.Logger, device Device, SMARTCtlResult int64) bo
// Check json
func jsonIsOk(logger *slog.Logger, json gjson.Result) bool {
messages := json.Get("smartctl.messages")
// logger.Debug(messages.String())
if messages.Exists() {
for _, message := range messages.Array() {
if message.Get("severity").String() == "error" {
Expand Down