|
| 1 | +--- |
| 2 | +rule_files: |
| 3 | + - /tmp/rules.verify |
| 4 | + |
| 5 | +group_eval_order: |
| 6 | + - recordingRules.rules |
| 7 | + - alerts.rules |
| 8 | +#information about this format can be found in: https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/ |
| 9 | +tests: |
| 10 | + # CDINotReady should fire when kubevirt_cdi_cr_ready == 0 for 5m |
| 11 | + - interval: 1m |
| 12 | + input_series: |
| 13 | + - series: 'kubevirt_cdi_cr_ready' |
| 14 | + values: "0 0 0 0 0 0 0 1 1" |
| 15 | + alert_rule_test: |
| 16 | + # must not trigger before the 5m for-window |
| 17 | + - eval_time: 4m |
| 18 | + alertname: CDINotReady |
| 19 | + exp_alerts: [] |
| 20 | + # must trigger after the for-window elapses with continuous zeros |
| 21 | + - eval_time: 6m |
| 22 | + alertname: CDINotReady |
| 23 | + exp_alerts: |
| 24 | + - exp_labels: |
| 25 | + severity: warning |
| 26 | + operator_health_impact: critical |
| 27 | + kubernetes_operator_part_of: kubevirt |
| 28 | + kubernetes_operator_component: containerized-data-importer |
| 29 | + exp_annotations: |
| 30 | + summary: CDI is not available to use |
| 31 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDINotReady |
| 32 | + # must not trigger when healthy (value 1) |
| 33 | + - eval_time: 8m |
| 34 | + alertname: CDINotReady |
| 35 | + exp_alerts: [] |
| 36 | + |
| 37 | + |
| 38 | + |
| 39 | + # CDIDataImportCronOutdated should fire when any cron is outdated (pending="false") for 15 minutes |
| 40 | + - interval: 1m |
| 41 | + input_series: |
| 42 | + - series: 'kubevirt_cdi_dataimportcron_outdated{pending="false", ns="user-ns", cron_name="cron-a"}' |
| 43 | + values: "1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" |
| 44 | + alert_rule_test: |
| 45 | + # Must not trigger before the 15m for-window elapses |
| 46 | + - eval_time: 14m |
| 47 | + alertname: CDIDataImportCronOutdated |
| 48 | + exp_alerts: [] |
| 49 | + # Must trigger after the for-window elapses |
| 50 | + - eval_time: 16m |
| 51 | + alertname: CDIDataImportCronOutdated |
| 52 | + exp_alerts: |
| 53 | + - exp_labels: |
| 54 | + severity: info |
| 55 | + operator_health_impact: warning |
| 56 | + kubernetes_operator_part_of: kubevirt |
| 57 | + kubernetes_operator_component: containerized-data-importer |
| 58 | + ns: user-ns |
| 59 | + cron_name: cron-a |
| 60 | + exp_annotations: |
| 61 | + summary: DataImportCron (recurring polling of VM templates disk image sources, also known as golden images) PVCs are not being updated on the defined schedule |
| 62 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDIDataImportCronOutdated |
| 63 | + |
| 64 | + # CDIDataImportCronOutdated must NOT fire for pending="true" |
| 65 | + - interval: 1m |
| 66 | + input_series: |
| 67 | + - series: 'kubevirt_cdi_dataimportcron_outdated{pending="true", ns="user-ns", cron_name="cron-b"}' |
| 68 | + values: "1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1" |
| 69 | + alert_rule_test: |
| 70 | + - eval_time: 20m |
| 71 | + alertname: CDIDataImportCronOutdated |
| 72 | + exp_alerts: [] |
| 73 | + |
| 74 | + # CDIOperatorDown should fire when kubevirt_cdi_operator_up == 0 for 5m |
| 75 | + - interval: 1m |
| 76 | + input_series: |
| 77 | + - series: 'kubevirt_cdi_operator_up' |
| 78 | + values: "0 0 0 0 0 0 0" |
| 79 | + alert_rule_test: |
| 80 | + - eval_time: 6m |
| 81 | + alertname: CDIOperatorDown |
| 82 | + exp_alerts: |
| 83 | + - exp_labels: |
| 84 | + severity: warning |
| 85 | + operator_health_impact: critical |
| 86 | + kubernetes_operator_part_of: kubevirt |
| 87 | + kubernetes_operator_component: containerized-data-importer |
| 88 | + exp_annotations: |
| 89 | + summary: CDI operator is down |
| 90 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDIOperatorDown |
| 91 | + |
| 92 | + # CDIDataVolumeUnusualRestartCount should fire when any population restart metric > 0 for 5m |
| 93 | + - interval: 1m |
| 94 | + input_series: |
| 95 | + - series: 'kube_pod_container_status_restarts_total{pod="importer-1",container="importer"}' |
| 96 | + values: "4 4 4 4 4 4" |
| 97 | + alert_rule_test: |
| 98 | + - eval_time: 6m |
| 99 | + alertname: CDIDataVolumeUnusualRestartCount |
| 100 | + exp_alerts: |
| 101 | + - exp_labels: |
| 102 | + severity: warning |
| 103 | + operator_health_impact: none |
| 104 | + kubernetes_operator_part_of: kubevirt |
| 105 | + kubernetes_operator_component: containerized-data-importer |
| 106 | + exp_annotations: |
| 107 | + summary: Some CDI population workloads have an unusual restart count, meaning they are probably failing and need to be investigated |
| 108 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDIDataVolumeUnusualRestartCount |
| 109 | + |
| 110 | + # CDIStorageProfilesIncomplete should fire when any storageprofile is incomplete for 5m |
| 111 | + - interval: 1m |
| 112 | + input_series: |
| 113 | + - series: 'kubevirt_cdi_storageprofile_info{complete="false",storageclass="sc1",provisioner="prov"}' |
| 114 | + values: "1 1 1 1 1 1" |
| 115 | + alert_rule_test: |
| 116 | + - eval_time: 6m |
| 117 | + alertname: CDIStorageProfilesIncomplete |
| 118 | + exp_alerts: |
| 119 | + - exp_labels: |
| 120 | + severity: info |
| 121 | + operator_health_impact: none |
| 122 | + kubernetes_operator_part_of: kubevirt |
| 123 | + kubernetes_operator_component: containerized-data-importer |
| 124 | + storageclass: sc1 |
| 125 | + provisioner: prov |
| 126 | + exp_annotations: |
| 127 | + summary: Incomplete StorageProfile sc1, accessMode/volumeMode cannot be inferred by CDI for PVC population request |
| 128 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDIStorageProfilesIncomplete |
| 129 | + |
| 130 | + # CDINoDefaultStorageClass should fire when there is no default or virtdefault and a DV pending for one (simulated with no series) |
| 131 | + - interval: 1m |
| 132 | + input_series: |
| 133 | + - series: 'kubevirt_cdi_datavolume_pending' |
| 134 | + values: "1 1 1 1 1 1" |
| 135 | + alert_rule_test: |
| 136 | + - eval_time: 6m |
| 137 | + alertname: CDINoDefaultStorageClass |
| 138 | + exp_alerts: |
| 139 | + - exp_labels: |
| 140 | + severity: warning |
| 141 | + operator_health_impact: none |
| 142 | + kubernetes_operator_part_of: kubevirt |
| 143 | + kubernetes_operator_component: containerized-data-importer |
| 144 | + exp_annotations: |
| 145 | + summary: No default StorageClass or virtualization StorageClass, and a DataVolume is pending for one |
| 146 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDINoDefaultStorageClass |
| 147 | + |
| 148 | + # CDIMultipleDefaultVirtStorageClasses should fire when more than one virtdefault=true exists for 5m |
| 149 | + - interval: 1m |
| 150 | + input_series: |
| 151 | + - series: 'kubevirt_cdi_storageprofile_info{virtdefault="true",storageclass="sc-a"}' |
| 152 | + values: "1 1 1 1 1 1" |
| 153 | + - series: 'kubevirt_cdi_storageprofile_info{virtdefault="true",storageclass="sc-b"}' |
| 154 | + values: "1 1 1 1 1 1" |
| 155 | + alert_rule_test: |
| 156 | + - eval_time: 6m |
| 157 | + alertname: CDIMultipleDefaultVirtStorageClasses |
| 158 | + exp_alerts: |
| 159 | + - exp_labels: |
| 160 | + severity: warning |
| 161 | + operator_health_impact: none |
| 162 | + kubernetes_operator_part_of: kubevirt |
| 163 | + kubernetes_operator_component: containerized-data-importer |
| 164 | + exp_annotations: |
| 165 | + summary: More than one default virtualization StorageClass detected |
| 166 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDIMultipleDefaultVirtStorageClasses |
| 167 | + |
| 168 | + # CDIDefaultStorageClassDegraded should fire when default/virtdefault degraded or missing (simulated with no series) |
| 169 | + - interval: 1m |
| 170 | + input_series: |
| 171 | + - series: 'kubevirt_cdi_storageprofile_info{default="true"}' |
| 172 | + values: "1 1 1 1 1 1" |
| 173 | + alert_rule_test: |
| 174 | + - eval_time: 6m |
| 175 | + alertname: CDIDefaultStorageClassDegraded |
| 176 | + exp_alerts: |
| 177 | + - exp_labels: |
| 178 | + severity: warning |
| 179 | + operator_health_impact: none |
| 180 | + kubernetes_operator_part_of: kubevirt |
| 181 | + kubernetes_operator_component: containerized-data-importer |
| 182 | + exp_annotations: |
| 183 | + summary: Default storage class has no smart clone or ReadWriteMany |
| 184 | + runbook_url: https://kubevirt.io/monitoring/runbooks/CDIDefaultStorageClassDegraded |
0 commit comments