diff --git a/cli/receivers.go b/cli/receivers.go new file mode 100644 index 0000000000..0779bc00c2 --- /dev/null +++ b/cli/receivers.go @@ -0,0 +1,309 @@ +// Copyright 2022 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cli + +import ( + "context" + "errors" + "log/slog" + "maps" + "sort" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" + "golang.org/x/sync/errgroup" + + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/notify" + "github.com/prometheus/alertmanager/notify/discord" + "github.com/prometheus/alertmanager/notify/email" + "github.com/prometheus/alertmanager/notify/msteams" + "github.com/prometheus/alertmanager/notify/opsgenie" + "github.com/prometheus/alertmanager/notify/pagerduty" + "github.com/prometheus/alertmanager/notify/pushover" + "github.com/prometheus/alertmanager/notify/slack" + "github.com/prometheus/alertmanager/notify/sns" + "github.com/prometheus/alertmanager/notify/telegram" + "github.com/prometheus/alertmanager/notify/victorops" + "github.com/prometheus/alertmanager/notify/webex" + "github.com/prometheus/alertmanager/notify/webhook" + "github.com/prometheus/alertmanager/notify/wechat" + "github.com/prometheus/alertmanager/template" + "github.com/prometheus/alertmanager/types" +) + +const ( + maxTestReceiversWorkers = 10 +) + +var ErrNoReceivers = errors.New("no receivers with configuration set") + +type TestReceiversParams struct { + Alert *TestReceiversAlertParams + Receivers []config.Receiver +} + +type TestReceiversAlertParams struct { + Annotations model.LabelSet `yaml:"annotations,omitempty" json:"annotations,omitempty"` + Labels model.LabelSet `yaml:"labels,omitempty" json:"labels,omitempty"` +} + +type TestReceiversResult struct { + Alert types.Alert + Receivers []TestReceiverResult + NotifedAt time.Time +} + +type TestReceiverResult struct { + Name string + ConfigResults []TestReceiverConfigResult +} + +type TestReceiverConfigResult struct { + Name string + Status string + Error error +} + +func TestReceivers(ctx context.Context, c TestReceiversParams, tmpl *template.Template) (*TestReceiversResult, error) { + // now represents the start time of the test + now := time.Now() + testAlert := newTestAlert(c, now, now) + + // we must set a group key that is unique per test as some receivers use this key to deduplicate alerts + ctx = notify.WithGroupKey(ctx, testAlert.Labels.String()+now.String()) + // we must set group labels to avoid issues with templating + ctx = notify.WithGroupLabels(ctx, testAlert.Labels) + + logger := promslog.New(&promslog.Config{}) + + // job contains all metadata required to test a receiver + type job struct { + Receiver config.Receiver + Integration *notify.Integration + } + + // result contains the receiver that was tested and an error that is non-nil if the test failed + type result struct { + Receiver config.Receiver + Integration *notify.Integration + Error error + } + + newTestReceiversResult := func(alert types.Alert, results []result, notifiedAt time.Time) *TestReceiversResult { + m := make(map[string]TestReceiverResult) + for _, receiver := range c.Receivers { + // set up the result for this receiver + m[receiver.Name] = TestReceiverResult{ + Name: receiver.Name, + ConfigResults: []TestReceiverConfigResult{}, + } + } + for _, result := range results { + tmp := m[result.Receiver.Name] + status := "ok" + if result.Error != nil { + status = "failed" + } + tmp.ConfigResults = append(tmp.ConfigResults, TestReceiverConfigResult{ + Name: result.Integration.Name(), + Status: status, + Error: result.Error, + }) + m[result.Receiver.Name] = tmp + } + v := new(TestReceiversResult) + v.Alert = alert + v.Receivers = make([]TestReceiverResult, 0, len(c.Receivers)) + v.NotifedAt = notifiedAt + for _, result := range m { + v.Receivers = append(v.Receivers, result) + } + + // Make sure the return order is deterministic. + sort.Slice(v.Receivers, func(i, j int) bool { + return v.Receivers[i].Name < v.Receivers[j].Name + }) + + return v + } + + // invalid keeps track of all invalid receiver configurations + var invalid []result + // jobs keeps track of all receivers that need to be sent test notifications + var jobs []job + + for _, receiver := range c.Receivers { + integrations := buildReceiverIntegrations(receiver, tmpl, logger) + for _, integration := range integrations { + if integration.Error != nil { + invalid = append(invalid, result{ + Receiver: receiver, + Integration: &integration.Integration, + Error: integration.Error, + }) + } else { + jobs = append(jobs, job{ + Receiver: receiver, + Integration: &integration.Integration, + }) + } + } + } + + if len(invalid)+len(jobs) == 0 { + return nil, ErrNoReceivers + } + + if len(jobs) == 0 { + return newTestReceiversResult(testAlert, invalid, now), nil + } + + numWorkers := min(maxTestReceiversWorkers, len(jobs)) + + resultCh := make(chan result, len(jobs)) + jobCh := make(chan job, len(jobs)) + for _, job := range jobs { + jobCh <- job + } + close(jobCh) + + g, ctx := errgroup.WithContext(ctx) + for range numWorkers { + g.Go(func() error { + for job := range jobCh { + v := result{ + Receiver: job.Receiver, + Integration: job.Integration, + } + if _, err := job.Integration.Notify(notify.WithReceiverName(ctx, job.Receiver.Name), &testAlert); err != nil { + v.Error = err + } + resultCh <- v + } + return nil + }) + } + g.Wait() // nolint + close(resultCh) + + results := make([]result, 0, len(jobs)) + for next := range resultCh { + results = append(results, next) + } + + return newTestReceiversResult(testAlert, append(invalid, results...), now), nil +} + +func newTestAlert(c TestReceiversParams, startsAt, updatedAt time.Time) types.Alert { + var ( + defaultAnnotations = model.LabelSet{ + "summary": "Notification test", + "__value_string__": "[ metric='foo' labels={instance=bar} value=10 ]", + } + defaultLabels = model.LabelSet{ + "alertname": "TestAlert", + "instance": "Alertmanager", + } + ) + + alert := types.Alert{ + Alert: model.Alert{ + Labels: defaultLabels, + Annotations: defaultAnnotations, + StartsAt: startsAt, + }, + UpdatedAt: updatedAt, + } + + if c.Alert != nil { + if c.Alert.Annotations != nil { + maps.Copy(alert.Annotations, c.Alert.Annotations) + } + if c.Alert.Labels != nil { + maps.Copy(alert.Labels, c.Alert.Labels) + } + } + + return alert +} + +type ReceiverIntegration struct { + Integration notify.Integration + Error error +} + +// buildReceiverIntegrations builds a list of integration notifiers off of a +// receiver config. +func buildReceiverIntegrations(nc config.Receiver, tmpl *template.Template, logger *slog.Logger) []ReceiverIntegration { + var ( + integrations []ReceiverIntegration + add = func(name string, i int, rs notify.ResolvedSender, f func(l *slog.Logger) (notify.Notifier, error)) { + n, err := f(logger.With("integration", name)) + if err != nil { + integrations = append(integrations, ReceiverIntegration{ + Integration: notify.NewIntegration(nil, rs, name, i, nc.Name), + Error: err, + }) + } else { + integrations = append(integrations, ReceiverIntegration{ + Integration: notify.NewIntegration(n, rs, name, i, nc.Name), + }) + } + } + ) + + for i, c := range nc.WebhookConfigs { + add("webhook", i, c, func(l *slog.Logger) (notify.Notifier, error) { return webhook.New(c, tmpl, l) }) + } + for i, c := range nc.EmailConfigs { + add("email", i, c, func(l *slog.Logger) (notify.Notifier, error) { return email.New(c, tmpl, l), nil }) + } + for i, c := range nc.PagerdutyConfigs { + add("pagerduty", i, c, func(l *slog.Logger) (notify.Notifier, error) { return pagerduty.New(c, tmpl, l) }) + } + for i, c := range nc.OpsGenieConfigs { + add("opsgenie", i, c, func(l *slog.Logger) (notify.Notifier, error) { return opsgenie.New(c, tmpl, l) }) + } + for i, c := range nc.WechatConfigs { + add("wechat", i, c, func(l *slog.Logger) (notify.Notifier, error) { return wechat.New(c, tmpl, l) }) + } + for i, c := range nc.SlackConfigs { + add("slack", i, c, func(l *slog.Logger) (notify.Notifier, error) { return slack.New(c, tmpl, l) }) + } + for i, c := range nc.VictorOpsConfigs { + add("victorops", i, c, func(l *slog.Logger) (notify.Notifier, error) { return victorops.New(c, tmpl, l) }) + } + for i, c := range nc.PushoverConfigs { + add("pushover", i, c, func(l *slog.Logger) (notify.Notifier, error) { return pushover.New(c, tmpl, l) }) + } + for i, c := range nc.SNSConfigs { + add("sns", i, c, func(l *slog.Logger) (notify.Notifier, error) { return sns.New(c, tmpl, l) }) + } + for i, c := range nc.TelegramConfigs { + add("telegram", i, c, func(l *slog.Logger) (notify.Notifier, error) { return telegram.New(c, tmpl, l) }) + } + for i, c := range nc.DiscordConfigs { + add("discord", i, c, func(l *slog.Logger) (notify.Notifier, error) { return discord.New(c, tmpl, l) }) + } + for i, c := range nc.WebexConfigs { + add("webex", i, c, func(l *slog.Logger) (notify.Notifier, error) { return webex.New(c, tmpl, l) }) + } + for i, c := range nc.MSTeamsConfigs { + add("msteams", i, c, func(l *slog.Logger) (notify.Notifier, error) { return msteams.New(c, tmpl, l) }) + } + + return integrations +} diff --git a/cli/root.go b/cli/root.go index 572f173428..a210e7cc76 100644 --- a/cli/root.go +++ b/cli/root.go @@ -173,6 +173,7 @@ func Execute() { configureClusterCmd(app) configureConfigCmd(app) configureTemplateCmd(app) + configureTestReceiversCmd(app) app.Action(initMatchersCompat) diff --git a/cli/test_receivers.go b/cli/test_receivers.go new file mode 100644 index 0000000000..ba56253df3 --- /dev/null +++ b/cli/test_receivers.go @@ -0,0 +1,119 @@ +// Copyright 2022 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cli + +import ( + "context" + "errors" + "fmt" + + "github.com/alecthomas/kingpin/v2" + + "github.com/prometheus/alertmanager/config" +) + +type testReceiversCmd struct { + configFile string + alertFile string +} + +const testReceiversHelp = `Test alertmanager receivers + +Send test notifications to every receiver for an alertmanager config file. +` + +var ( + ErrNoConfigFile = errors.New("no config file was specified") + ErrInvalidConfigFile = errors.New("invalid alertmanager config file") + ErrInvalidAlertFile = errors.New("invalid alert config file") + ErrInvalidTemplate = errors.New("failed to parse templates") + ErrInternal = errors.New("internal error parsing mock url") +) + +func configureTestReceiversCmd(app *kingpin.Application) { + var ( + t = &testReceiversCmd{} + testCmd = app.Command("test-receivers", testReceiversHelp) + ) + testCmd.Arg("config.file", "Config file to be tested.").ExistingFileVar(&t.configFile) + testCmd.Flag("alert.file", "Mock alert file with annotations and labels to add to test alert.").ExistingFileVar(&t.alertFile) + testCmd.Action(execWithTimeout(t.testReceivers)) +} + +func (t *testReceiversCmd) testReceivers(ctx context.Context, _ *kingpin.ParseContext) error { + if len(t.configFile) == 0 { + return ErrNoConfigFile + } + + fmt.Printf("Checking alertmanager config '%s'...\n", t.configFile) + cfg, err := config.LoadFile(t.configFile) + if err != nil { + return ErrInvalidConfigFile + } + + if cfg != nil { + tmpl, err := getTemplate(cfg) + if err != nil { + return err + } + + c := TestReceiversParams{ + Receivers: cfg.Receivers, + } + + if t.alertFile != "" { + alert, err := loadAlertConfigFile(t.alertFile) + if err != nil { + return ErrInvalidAlertFile + } + c.Alert = alert + } + + fmt.Printf("Testing %d receivers...\n", len(cfg.Receivers)) + result, err := TestReceivers(ctx, c, tmpl) + if err != nil { + return err + } + printTestReceiversResults(result) + } + + return nil +} + +func printTestReceiversResults(result *TestReceiversResult) { + successful := 0 + successfulCounts := make(map[string]int) + for _, rcv := range result.Receivers { + successfulCounts[rcv.Name] = 0 + for _, cfg := range rcv.ConfigResults { + if cfg.Error == nil { + successful++ + successfulCounts[rcv.Name]++ + } + } + } + + fmt.Printf("\nSuccessfully notified %d/%d receivers at %v:\n", successful, len(result.Receivers), result.NotifedAt.Format("2006-01-02 15:04:05")) + + for _, rcv := range result.Receivers { + fmt.Printf(" %d/%d - '%s'\n", successfulCounts[rcv.Name], len(rcv.ConfigResults), rcv.Name) + for _, cfg := range rcv.ConfigResults { + if cfg.Error != nil { + fmt.Printf(" - %s - %s: %s\n", cfg.Name, cfg.Status, cfg.Error.Error()) + } else { + fmt.Printf(" - %s - %s\n", cfg.Name, cfg.Status) + } + } + } +} diff --git a/cli/test_receivers_test.go b/cli/test_receivers_test.go new file mode 100644 index 0000000000..abd6008c22 --- /dev/null +++ b/cli/test_receivers_test.go @@ -0,0 +1,58 @@ +// Copyright 2022 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cli + +import ( + "context" + "testing" + + "github.com/alecthomas/kingpin/v2" + "github.com/stretchr/testify/require" +) + +func Test_TestReceivers_Error(t *testing.T) { + ctx := context.Background() + parseContext := kingpin.ParseContext{} + + t.Run("invalid alertmanager config", func(t *testing.T) { + test := testReceiversCmd{ + configFile: "testdata/conf.bad.yml", + } + + err := test.testReceivers(ctx, &parseContext) + require.Error(t, err) + require.Equal(t, ErrInvalidConfigFile.Error(), err.Error()) + }) + + t.Run("invalid alert", func(t *testing.T) { + test := testReceiversCmd{ + configFile: "testdata/conf.receiver.yml", + alertFile: "testdata/conf.bad-alert.yml", + } + + err := test.testReceivers(ctx, &parseContext) + require.Error(t, err) + require.Equal(t, ErrInvalidAlertFile.Error(), err.Error()) + }) + + t.Run("no receivers", func(t *testing.T) { + test := testReceiversCmd{ + configFile: "testdata/conf.good.yml", + } + + err := test.testReceivers(ctx, &parseContext) + require.Error(t, err) + require.Equal(t, ErrNoReceivers.Error(), err.Error()) + }) +} diff --git a/cli/testdata/conf.bad-alert.yml b/cli/testdata/conf.bad-alert.yml new file mode 100644 index 0000000000..7b3a785483 --- /dev/null +++ b/cli/testdata/conf.bad-alert.yml @@ -0,0 +1 @@ +BAD diff --git a/cli/testdata/conf.receiver.yml b/cli/testdata/conf.receiver.yml new file mode 100644 index 0000000000..bfe2fa956a --- /dev/null +++ b/cli/testdata/conf.receiver.yml @@ -0,0 +1,14 @@ +global: + slack_api_url: "https://hooks.slack.com/services/random/random" + +route: + group_by: ["alertname"] + group_wait: 30s + group_interval: 5m + repeat_interval: 5m + receiver: "slack-alerts" + +receivers: + - name: "slack-alerts" + slack_configs: + - channel: "#dev" diff --git a/cli/utils.go b/cli/utils.go index 7215220af2..1b62727c41 100644 --- a/cli/utils.go +++ b/cli/utils.go @@ -22,11 +22,13 @@ import ( "github.com/alecthomas/kingpin/v2" "github.com/prometheus/common/model" + "gopkg.in/yaml.v2" "github.com/prometheus/alertmanager/api/v2/client/general" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/pkg/labels" + "github.com/prometheus/alertmanager/template" ) // getRemoteAlertmanagerConfigStatus returns status responsecontaining configuration from remote Alertmanager. @@ -111,3 +113,35 @@ func execWithTimeout(fn func(context.Context, *kingpin.ParseContext) error) func return fn(ctx, x) } } + +func loadAlertConfigFile(filename string) (*TestReceiversAlertParams, error) { + b, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + + alert := &TestReceiversAlertParams{} + err = yaml.UnmarshalStrict(b, alert) + if err != nil { + return nil, err + } + + return alert, nil +} + +func getTemplate(cfg *config.Config) (*template.Template, error) { + tmpl, err := template.FromGlobs(cfg.Templates) + if err != nil { + return nil, ErrInvalidTemplate + } + if alertmanagerURL != nil { + tmpl.ExternalURL = alertmanagerURL + } else { + u, err := url.Parse("https://example.com") + if err != nil { + return nil, ErrInternal + } + tmpl.ExternalURL = u + } + return tmpl, nil +} diff --git a/go.mod b/go.mod index 52aab3f618..6691275e86 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( go.uber.org/automaxprocs v1.6.0 golang.org/x/mod v0.30.0 golang.org/x/net v0.47.0 + golang.org/x/sync v0.18.0 golang.org/x/text v0.31.0 golang.org/x/tools v0.39.0 google.golang.org/grpc v1.75.0 @@ -127,7 +128,6 @@ require ( go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/crypto v0.44.0 // indirect golang.org/x/oauth2 v0.32.0 // indirect - golang.org/x/sync v0.18.0 // indirect golang.org/x/sys v0.38.0 // indirect golang.org/x/telemetry v0.0.0-20251111182119-bc8e575c7b54 // indirect golang.org/x/time v0.13.0 // indirect