From be6fc00d98ee045b85115c9b0e0f8c07065ac02a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20R=C3=BCger?= Date: Mon, 22 Apr 2024 10:15:12 +0200 Subject: [PATCH 1/2] feat: Add timezone to kube_cronjob_info --- docs/metrics/workload/cronjob-metrics.md | 2 +- internal/store/cronjob.go | 8 ++++++-- internal/store/cronjob_test.go | 10 ++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/metrics/workload/cronjob-metrics.md b/docs/metrics/workload/cronjob-metrics.md index ae32dc65d..05d8fe86e 100644 --- a/docs/metrics/workload/cronjob-metrics.md +++ b/docs/metrics/workload/cronjob-metrics.md @@ -3,7 +3,7 @@ | Metric name | Metric type | Description | Labels/tags | Status | | ---------------------------------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------ | | kube_cronjob_annotations | Gauge | Kubernetes annotations converted to Prometheus labels controlled via [--metric-annotations-allowlist](../../developer/cli-arguments.md) | `cronjob`=<cronjob-name>
`namespace`=<cronjob-namespace>
`annotation_CRONJOB_ANNOTATION`=<CRONJOB_ANNOTATION> | EXPERIMENTAL | -| kube_cronjob_info | Gauge | | `cronjob`=<cronjob-name>
`namespace`=<cronjob-namespace>
`schedule`=<schedule>
`concurrency_policy`=<concurrency-policy> | STABLE | +| kube_cronjob_info | Gauge | | `cronjob`=<cronjob-name>
`namespace`=<cronjob-namespace>
`schedule`=<schedule>
`concurrency_policy`=<concurrency-policy>
`timezone`=<timezone> | STABLE | | kube_cronjob_labels | Gauge | Kubernetes labels converted to Prometheus labels controlled via [--metric-labels-allowlist](../../developer/cli-arguments.md) | `cronjob`=<cronjob-name>
`namespace`=<cronjob-namespace>
`label_CRONJOB_LABEL`=<CRONJOB_LABEL> | STABLE | | kube_cronjob_created | Gauge | | `cronjob`=<cronjob-name>
`namespace`=<cronjob-namespace> | STABLE | | kube_cronjob_next_schedule_time | Gauge | | `cronjob`=<cronjob-name>
`namespace`=<cronjob-namespace> | STABLE | diff --git a/internal/store/cronjob.go b/internal/store/cronjob.go index 1450f70c4..a6613f2dd 100644 --- a/internal/store/cronjob.go +++ b/internal/store/cronjob.go @@ -96,11 +96,15 @@ func cronJobMetricFamilies(allowAnnotationsList, allowLabelsList []string) []gen basemetrics.STABLE, "", wrapCronJobFunc(func(j *batchv1.CronJob) *metric.Family { + timeZone := "local" + if j.Spec.TimeZone != nil { + timeZone = *j.Spec.TimeZone + } return &metric.Family{ Metrics: []*metric.Metric{ { - LabelKeys: []string{"schedule", "concurrency_policy"}, - LabelValues: []string{j.Spec.Schedule, string(j.Spec.ConcurrencyPolicy)}, + LabelKeys: []string{"schedule", "concurrency_policy", "timezone"}, + LabelValues: []string{j.Spec.Schedule, string(j.Spec.ConcurrencyPolicy), timeZone}, Value: 1, }, }, diff --git a/internal/store/cronjob_test.go b/internal/store/cronjob_test.go index 442bb8382..333dd0499 100644 --- a/internal/store/cronjob_test.go +++ b/internal/store/cronjob_test.go @@ -40,6 +40,7 @@ var ( ActiveRunningCronJob1LastScheduleTime = time.Unix(1520742896, 0) SuspendedCronJob1LastScheduleTime = time.Unix(1520742896+5.5*3600, 0) // 5.5 hours later ActiveCronJob1NoLastScheduledCreationTimestamp = time.Unix(1520742896+6.5*3600, 0) + TimeZone = "Europe/Berlin" ) func TestCronJobStore(t *testing.T) { @@ -159,7 +160,7 @@ func TestCronJobStore(t *testing.T) { # TYPE kube_cronjob_status_active gauge # TYPE kube_cronjob_metadata_resource_version gauge # TYPE kube_cronjob_status_last_schedule_time gauge - kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveRunningCronJob1",namespace="ns1",schedule="0 */6 * * *"} 1 + kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveRunningCronJob1",namespace="ns1",schedule="0 */6 * * *",timezone="local"} 1 kube_cronjob_annotations{annotation_app_k8s_io_owner="@foo",cronjob="ActiveRunningCronJob1",namespace="ns1"} 1 kube_cronjob_spec_failed_job_history_limit{cronjob="ActiveRunningCronJob1",namespace="ns1"} 1 kube_cronjob_spec_starting_deadline_seconds{cronjob="ActiveRunningCronJob1",namespace="ns1"} 300 @@ -206,6 +207,7 @@ func TestCronJobStore(t *testing.T) { ConcurrencyPolicy: "Forbid", Suspend: &SuspendTrue, Schedule: "0 */3 * * *", + TimeZone: &TimeZone, SuccessfulJobsHistoryLimit: &SuccessfulJobHistoryLimit3, FailedJobsHistoryLimit: &FailedJobHistoryLimit1, }, @@ -233,7 +235,7 @@ func TestCronJobStore(t *testing.T) { # TYPE kube_cronjob_metadata_resource_version gauge # TYPE kube_cronjob_status_last_schedule_time gauge # TYPE kube_cronjob_status_last_successful_time gauge - kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *"} 1 + kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *",timezone="Europe/Berlin"} 1 kube_cronjob_spec_failed_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 1 kube_cronjob_spec_starting_deadline_seconds{cronjob="SuspendedCronJob1",namespace="ns1"} 300 kube_cronjob_spec_successful_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 3 @@ -292,7 +294,7 @@ func TestCronJobStore(t *testing.T) { # TYPE kube_cronjob_metadata_resource_version gauge # TYPE kube_cronjob_status_last_schedule_time gauge # TYPE kube_cronjob_status_last_successful_time gauge - kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *"} 1 + kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *",timezone="local"} 1 kube_cronjob_spec_failed_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 1 kube_cronjob_spec_starting_deadline_seconds{cronjob="SuspendedCronJob1",namespace="ns1"} 300 kube_cronjob_spec_successful_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 3 @@ -359,7 +361,7 @@ func TestCronJobStore(t *testing.T) { kube_cronjob_spec_failed_job_history_limit{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 1 kube_cronjob_spec_successful_job_history_limit{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 3 kube_cronjob_spec_suspend{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 0 - kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1",schedule="25 * * * *"} 1 + kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1",schedule="25 * * * *",timezone="local"} 1 kube_cronjob_created{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 1.520766296e+09 ` + fmt.Sprintf("kube_cronjob_next_schedule_time{cronjob=\"ActiveCronJob1NoLastScheduled\",namespace=\"ns1\"} %ve+09\n", From 569e820c65ecfad6c158d5b5eb1ac4fd94cb1b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20R=C3=BCger?= Date: Mon, 22 Apr 2024 10:25:20 +0200 Subject: [PATCH 2/2] feat: Make cronjob timezone aware --- internal/store/cronjob.go | 8 +- internal/store/cronjob_test.go | 212 ++++++++++++++++++++++++++------- 2 files changed, 176 insertions(+), 44 deletions(-) diff --git a/internal/store/cronjob.go b/internal/store/cronjob.go index a6613f2dd..1b335b9d6 100644 --- a/internal/store/cronjob.go +++ b/internal/store/cronjob.go @@ -249,7 +249,7 @@ func cronJobMetricFamilies(allowAnnotationsList, allowLabelsList []string) []gen ms := []*metric.Metric{} // If the cron job is suspended, don't track the next scheduled time - nextScheduledTime, err := getNextScheduledTime(j.Spec.Schedule, j.Status.LastScheduleTime, j.CreationTimestamp) + nextScheduledTime, err := getNextScheduledTime(j.Spec.Schedule, j.Status.LastScheduleTime, j.CreationTimestamp, j.Spec.TimeZone) if err != nil { panic(err) } else if !*j.Spec.Suspend { @@ -351,7 +351,11 @@ func createCronJobListWatch(kubeClient clientset.Interface, ns string, fieldSele } } -func getNextScheduledTime(schedule string, lastScheduleTime *metav1.Time, createdTime metav1.Time) (time.Time, error) { +func getNextScheduledTime(schedule string, lastScheduleTime *metav1.Time, createdTime metav1.Time, timeZone *string) (time.Time, error) { + if timeZone != nil { + schedule = fmt.Sprintf("CRON_TZ=%s %s", *timeZone, schedule) + } + sched, err := cron.ParseStandard(schedule) if err != nil { return time.Time{}, fmt.Errorf("Failed to parse cron job schedule '%s': %w", schedule, err) diff --git a/internal/store/cronjob_test.go b/internal/store/cronjob_test.go index 333dd0499..7206c0e2b 100644 --- a/internal/store/cronjob_test.go +++ b/internal/store/cronjob_test.go @@ -40,69 +40,163 @@ var ( ActiveRunningCronJob1LastScheduleTime = time.Unix(1520742896, 0) SuspendedCronJob1LastScheduleTime = time.Unix(1520742896+5.5*3600, 0) // 5.5 hours later ActiveCronJob1NoLastScheduledCreationTimestamp = time.Unix(1520742896+6.5*3600, 0) - TimeZone = "Europe/Berlin" + TimeZone = "Asia/Shanghai" ) -func TestCronJobStore(t *testing.T) { - hour := ActiveRunningCronJob1LastScheduleTime.Hour() - ActiveRunningCronJob1NextScheduleTime := time.Time{} +func calculateNextSchedule6h(timestamp time.Time, timezone string) time.Time { + loc, _ := time.LoadLocation(timezone) + hour := timestamp.In(loc).Hour() switch { case hour < 6: - ActiveRunningCronJob1NextScheduleTime = time.Date( - ActiveRunningCronJob1LastScheduleTime.Year(), - ActiveRunningCronJob1LastScheduleTime.Month(), - ActiveRunningCronJob1LastScheduleTime.Day(), + return time.Date( + timestamp.Year(), + timestamp.Month(), + timestamp.Day(), 6, 0, - 0, 0, time.Local) + 0, 0, loc) case hour < 12: - ActiveRunningCronJob1NextScheduleTime = time.Date( - ActiveRunningCronJob1LastScheduleTime.Year(), - ActiveRunningCronJob1LastScheduleTime.Month(), - ActiveRunningCronJob1LastScheduleTime.Day(), + return time.Date( + timestamp.Year(), + timestamp.Month(), + timestamp.Day(), 12, 0, - 0, 0, time.Local) + 0, 0, loc) case hour < 18: - ActiveRunningCronJob1NextScheduleTime = time.Date( - ActiveRunningCronJob1LastScheduleTime.Year(), - ActiveRunningCronJob1LastScheduleTime.Month(), - ActiveRunningCronJob1LastScheduleTime.Day(), + return time.Date( + timestamp.Year(), + timestamp.Month(), + timestamp.Day(), 18, 0, - 0, 0, time.Local) - case hour < 24: - ActiveRunningCronJob1NextScheduleTime = time.Date( - ActiveRunningCronJob1LastScheduleTime.Year(), - ActiveRunningCronJob1LastScheduleTime.Month(), - ActiveRunningCronJob1LastScheduleTime.Day(), - 24, + 0, 0, loc) + default: + return time.Date( + timestamp.Year(), + timestamp.Month(), + timestamp.Day()+1, + 0, 0, - 0, 0, time.Local) + 0, 0, loc) } +} - minute := ActiveCronJob1NoLastScheduledCreationTimestamp.Minute() - ActiveCronJob1NoLastScheduledNextScheduleTime := time.Time{} +func calculateNextSchedule25m(timestamp time.Time, timezone string) time.Time { + loc, _ := time.LoadLocation(timezone) + minute := timestamp.In(loc).Minute() switch { case minute < 25: - ActiveCronJob1NoLastScheduledNextScheduleTime = time.Date( - ActiveCronJob1NoLastScheduledCreationTimestamp.Year(), - ActiveCronJob1NoLastScheduledCreationTimestamp.Month(), - ActiveCronJob1NoLastScheduledCreationTimestamp.Day(), - ActiveCronJob1NoLastScheduledCreationTimestamp.Hour(), + return time.Date( + timestamp.Year(), + timestamp.Month(), + timestamp.Day(), + timestamp.Hour(), 25, - 0, 0, time.Local) + 0, 0, loc) default: - ActiveCronJob1NoLastScheduledNextScheduleTime = time.Date( - ActiveCronJob1NoLastScheduledNextScheduleTime.Year(), - ActiveCronJob1NoLastScheduledNextScheduleTime.Month(), - ActiveCronJob1NoLastScheduledNextScheduleTime.Day(), - ActiveCronJob1NoLastScheduledNextScheduleTime.Hour()+1, + return time.Date( + timestamp.Year(), + timestamp.Month(), + timestamp.Day(), + timestamp.Hour()+1, 25, - 0, 0, time.Local) + 0, 0, loc) } +} +func TestCronJobStore(t *testing.T) { + + ActiveRunningCronJob1NextScheduleTime := calculateNextSchedule6h(ActiveRunningCronJob1LastScheduleTime, "Local") + ActiveRunningCronJobWithTZ1NextScheduleTime := calculateNextSchedule6h(ActiveRunningCronJob1LastScheduleTime, TimeZone) + + ActiveCronJob1NoLastScheduledNextScheduleTime := calculateNextSchedule25m(ActiveCronJob1NoLastScheduledCreationTimestamp, "Local") + cases := []generateMetricsTestCase{ + { + AllowAnnotationsList: []string{ + "app.k8s.io/owner", + }, + Obj: &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ActiveRunningCronJobWithTZ1", + Namespace: "ns1", + Generation: 1, + ResourceVersion: "11111", + Labels: map[string]string{ + "app": "example-active-running-with-tz-1", + }, + Annotations: map[string]string{ + "app": "mysql-server", + "app.k8s.io/owner": "@foo", + }, + }, + Status: batchv1.CronJobStatus{ + Active: []v1.ObjectReference{{Name: "FakeJob1"}, {Name: "FakeJob2"}}, + LastScheduleTime: &metav1.Time{Time: ActiveRunningCronJob1LastScheduleTime}, + LastSuccessfulTime: nil, + }, + Spec: batchv1.CronJobSpec{ + StartingDeadlineSeconds: &StartingDeadlineSeconds300, + ConcurrencyPolicy: "Forbid", + Suspend: &SuspendFalse, + Schedule: "0 */6 * * *", + SuccessfulJobsHistoryLimit: &SuccessfulJobHistoryLimit3, + FailedJobsHistoryLimit: &FailedJobHistoryLimit1, + TimeZone: &TimeZone, + }, + }, + Want: ` + # HELP kube_cronjob_created [STABLE] Unix creation timestamp + # HELP kube_cronjob_info [STABLE] Info about cronjob. + # HELP kube_cronjob_annotations Kubernetes annotations converted to Prometheus labels. + # HELP kube_cronjob_labels [STABLE] Kubernetes labels converted to Prometheus labels. + # HELP kube_cronjob_next_schedule_time [STABLE] Next time the cronjob should be scheduled. The time after lastScheduleTime, or after the cron job's creation time if it's never been scheduled. Use this to determine if the job is delayed. + # HELP kube_cronjob_spec_failed_job_history_limit Failed job history limit tells the controller how many failed jobs should be preserved. + # HELP kube_cronjob_spec_starting_deadline_seconds [STABLE] Deadline in seconds for starting the job if it misses scheduled time for any reason. + # HELP kube_cronjob_spec_successful_job_history_limit Successful job history limit tells the controller how many completed jobs should be preserved. + # HELP kube_cronjob_spec_suspend [STABLE] Suspend flag tells the controller to suspend subsequent executions. + # HELP kube_cronjob_status_active [STABLE] Active holds pointers to currently running jobs. + # HELP kube_cronjob_metadata_resource_version [STABLE] Resource version representing a specific version of the cronjob. + # HELP kube_cronjob_status_last_schedule_time [STABLE] LastScheduleTime keeps information of when was the last time the job was successfully scheduled. + # TYPE kube_cronjob_created gauge + # TYPE kube_cronjob_info gauge + # TYPE kube_cronjob_annotations gauge + # TYPE kube_cronjob_labels gauge + # TYPE kube_cronjob_next_schedule_time gauge + # TYPE kube_cronjob_spec_failed_job_history_limit gauge + # TYPE kube_cronjob_spec_starting_deadline_seconds gauge + # TYPE kube_cronjob_spec_successful_job_history_limit gauge + # TYPE kube_cronjob_spec_suspend gauge + # TYPE kube_cronjob_status_active gauge + # TYPE kube_cronjob_metadata_resource_version gauge + # TYPE kube_cronjob_status_last_schedule_time gauge + kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1",schedule="0 */6 * * *",timezone="Asia/Shanghai"} 1 + kube_cronjob_annotations{annotation_app_k8s_io_owner="@foo",cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 1 + kube_cronjob_spec_failed_job_history_limit{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 1 + kube_cronjob_spec_starting_deadline_seconds{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 300 + kube_cronjob_spec_successful_job_history_limit{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 3 + kube_cronjob_spec_suspend{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 0 + kube_cronjob_status_active{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 2 + kube_cronjob_metadata_resource_version{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 11111 + kube_cronjob_status_last_schedule_time{cronjob="ActiveRunningCronJobWithTZ1",namespace="ns1"} 1.520742896e+09 +` + fmt.Sprintf("kube_cronjob_next_schedule_time{cronjob=\"ActiveRunningCronJobWithTZ1\",namespace=\"ns1\"} %ve+09\n", + float64(ActiveRunningCronJobWithTZ1NextScheduleTime.Unix())/math.Pow10(9)), + MetricNames: []string{ + "kube_cronjob_next_schedule_time", + "kube_cronjob_spec_starting_deadline_seconds", + "kube_cronjob_status_active", + "kube_cronjob_metadata_resource_version", + "kube_cronjob_spec_suspend", + "kube_cronjob_info", + "kube_cronjob_created", + "kube_cronjob_annotations", + "kube_cronjob_labels", + "kube_cronjob_status_last_schedule_time", + "kube_cronjob_spec_successful_job_history_limit", + "kube_cronjob_spec_failed_job_history_limit", + }, + }, { AllowAnnotationsList: []string{ "app.k8s.io/owner", @@ -235,7 +329,7 @@ func TestCronJobStore(t *testing.T) { # TYPE kube_cronjob_metadata_resource_version gauge # TYPE kube_cronjob_status_last_schedule_time gauge # TYPE kube_cronjob_status_last_successful_time gauge - kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *",timezone="Europe/Berlin"} 1 + kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *",timezone="Asia/Shanghai"} 1 kube_cronjob_spec_failed_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 1 kube_cronjob_spec_starting_deadline_seconds{cronjob="SuspendedCronJob1",namespace="ns1"} 300 kube_cronjob_spec_successful_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 3 @@ -353,7 +447,7 @@ func TestCronJobStore(t *testing.T) { # TYPE kube_cronjob_spec_successful_job_history_limit gauge # TYPE kube_cronjob_spec_suspend gauge # TYPE kube_cronjob_status_active gauge - # TYPE kube_cronjob_metadata_resource_version gauge + # TYPE kube_cronjob_metadata_resource_version gauge # TYPE kube_cronjob_status_last_successful_time gauge kube_cronjob_spec_starting_deadline_seconds{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 300 kube_cronjob_status_active{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 0 @@ -377,3 +471,37 @@ func TestCronJobStore(t *testing.T) { } } } + +func TestGetNextScheduledTime(t *testing.T) { + + testCases := []struct { + schedule string + lastScheduleTime metav1.Time + createdTime metav1.Time + timeZone string + expected time.Time + }{ + { + schedule: "0 */6 * * *", + lastScheduleTime: metav1.Time{Time: ActiveRunningCronJob1LastScheduleTime}, + createdTime: metav1.Time{Time: ActiveRunningCronJob1LastScheduleTime}, + timeZone: "UTC", + expected: ActiveRunningCronJob1LastScheduleTime.Add(time.Second*4 + time.Minute*25 + time.Hour), + }, + { + schedule: "0 */6 * * *", + lastScheduleTime: metav1.Time{Time: ActiveRunningCronJob1LastScheduleTime}, + createdTime: metav1.Time{Time: ActiveRunningCronJob1LastScheduleTime}, + timeZone: TimeZone, + expected: ActiveRunningCronJob1LastScheduleTime.Add(time.Second*4 + time.Minute*25 + time.Hour*5), + }, + } + + for _, test := range testCases { + actual, _ := getNextScheduledTime(test.schedule, &test.lastScheduleTime, test.createdTime, &test.timeZone) // #nosec G601 + if !actual.Equal(test.expected) { + t.Fatalf("%v: expected %v, actual %v", test.schedule, test.expected, actual) + } + } + +}