diff --git a/.github/workflows/crio.yml b/.github/workflows/crio.yml index 20f9ba2147..e892371f7a 100644 --- a/.github/workflows/crio.yml +++ b/.github/workflows/crio.yml @@ -56,8 +56,8 @@ jobs: - name: Configure CRI-O run: | sudo mkdir -p /etc/crio/crio.conf.d - printf '[crio.runtime]\nlog_level = "debug"\n[crio.image]\nshort_name_mode = "disabled"\n' | sudo tee /etc/crio/crio.conf.d/01-log-level.conf - + printf '[crio.runtime]\nlog_level = "debug"\n[crio.image]\nshort_name_mode = "disabled"\n' | sudo tee -a /etc/crio/crio.conf.d/01-base.conf + printf '[crio.stats]\nincluded_pod_metrics = [\n"disk",\n"diskIO",\n"network",\n"cpu",\n"hugetlb",\n"memory",\n"oom",\n"process",\n"spec",\n]\n' | sudo tee -a /etc/crio/crio.conf.d/01-base.conf - name: Configure CRI-O to use conmon-rs intead of the default conmon if: ${{matrix.monitor == 'conmon-rs'}} run: | diff --git a/pkg/validate/pod.go b/pkg/validate/pod.go index f7156b7075..b83795bb9b 100644 --- a/pkg/validate/pod.go +++ b/pkg/validate/pod.go @@ -20,9 +20,13 @@ import ( "context" "os" "path/filepath" + "strings" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "google.golang.org/grpc/codes" + grpcstatus "google.golang.org/grpc/status" internalapi "k8s.io/cri-api/pkg/apis" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -30,6 +34,52 @@ import ( "sigs.k8s.io/cri-tools/pkg/framework" ) +// expectedMetricDescriptorNames contains all expected metric descriptor names +// based on metrics returned by kubelet with CRI-O and cadvisor on the legacy cadvisor stats provider +// on kubernetes 1.35. +var expectedMetricDescriptorNames = []string{ + "container_blkio_device_usage_total", + "container_cpu_system_seconds_total", + "container_cpu_usage_seconds_total", + "container_cpu_user_seconds_total", + "container_file_descriptors", + "container_fs_reads_bytes_total", + "container_fs_reads_total", + "container_fs_usage_bytes", + "container_fs_writes_bytes_total", + "container_fs_writes_total", + "container_last_seen", + "container_memory_cache", + "container_memory_failcnt", + "container_memory_failures_total", + "container_memory_mapped_file", + "container_memory_max_usage_bytes", + "container_memory_rss", + "container_memory_swap", + "container_memory_usage_bytes", + "container_memory_working_set_bytes", + "container_network_receive_bytes_total", + "container_network_receive_errors_total", + "container_network_receive_packets_dropped_total", + "container_network_receive_packets_total", + "container_network_transmit_bytes_total", + "container_network_transmit_errors_total", + "container_network_transmit_packets_dropped_total", + "container_network_transmit_packets_total", + "container_oom_events_total", + "container_processes", + "container_sockets", + "container_spec_cpu_period", + "container_spec_cpu_shares", + "container_spec_memory_limit_bytes", + "container_spec_memory_reservation_limit_bytes", + "container_spec_memory_swap_limit_bytes", + "container_start_time_seconds", + "container_threads", + "container_threads_max", + "container_ulimits_soft", +} + var _ = framework.KubeDescribe("PodSandbox", func() { f := framework.NewDefaultCRIFramework() @@ -80,6 +130,62 @@ var _ = framework.KubeDescribe("PodSandbox", func() { podID = "" // no need to cleanup pod }) }) + Context("runtime should support metrics operations", func() { + var podID string + var podConfig *runtimeapi.PodSandboxConfig + BeforeEach(func() { + _, err := rc.ListMetricDescriptors(context.TODO()) + if err != nil { + s, ok := grpcstatus.FromError(err) + Expect(ok && s.Code() == codes.Unimplemented).To(BeTrue(), "Expected CRI metric descriptors call to either be not supported, or not error") + if s.Code() == codes.Unimplemented { + Skip("CRI Metrics endpoints not supported by this runtime version") + } + } + }) + + AfterEach(func() { + if podID != "" { + By("stop PodSandbox") + Expect(rc.StopPodSandbox(context.TODO(), podID)).NotTo(HaveOccurred()) + By("delete PodSandbox") + Expect(rc.RemovePodSandbox(context.TODO(), podID)).NotTo(HaveOccurred()) + } + }) + + It("runtime should support returning metrics descriptors [Conformance]", func() { + By("list metric descriptors") + descs := listMetricDescriptors(rc) + + By("verify expected metric descriptors are present") + testMetricDescriptors(descs) + }) + + It("runtime should support listing pod sandbox metrics [Conformance]", func() { + By("create pod sandbox") + podID, podConfig = framework.CreatePodSandboxForContainer(rc) + + By("create container in pod") + ic := f.CRIClient.CRIImageClient + containerID := framework.CreateDefaultContainer(rc, ic, podID, podConfig, "container-for-metrics-") + + By("start container") + startContainer(rc, containerID) + + _, _, err := rc.ExecSync( + context.TODO(), containerID, []string{"/bin/sh", "-c", "for i in $(seq 1 10); do echo hi >> /var/lib/mydisktest/inode_test_file_$i; done; sync"}, + time.Duration(defaultExecSyncTimeout)*time.Second, + ) + + Expect(err).ToNot(HaveOccurred()) + + By("list pod sandbox metrics") + metrics := listPodSandboxMetrics(rc) + + By("verify pod metrics are present") + testPodSandboxMetrics(metrics, podID) + }) + }) }) // podSandboxFound returns whether PodSandbox is found. @@ -166,6 +272,17 @@ func listPodSandbox(c internalapi.RuntimeService, filter *runtimeapi.PodSandboxF return pods } +// listMetricDescriptors lists MetricDescriptors. +func listMetricDescriptors(c internalapi.RuntimeService) []*runtimeapi.MetricDescriptor { + By("List MetricDescriptors.") + + descs, err := c.ListMetricDescriptors(context.TODO()) + framework.ExpectNoError(err, "failed to list MetricDescriptors status: %v", err) + framework.Logf("List MetricDescriptors succeed") + + return descs +} + // createLogTempDir creates the log temp directory for podSandbox. func createLogTempDir(podSandboxName string) (hostPath, podLogPath string) { hostPath, err := os.MkdirTemp("", "podLogTest") @@ -196,3 +313,76 @@ func createPodSandboxWithLogDirectory(c internalapi.RuntimeService) (sandboxID s return framework.RunPodSandbox(c, podConfig), podConfig, hostPath } + +// testMetricDescriptors verifies that all expected metric descriptors are present. +func testMetricDescriptors(descs []*runtimeapi.MetricDescriptor) { + returnedDescriptors := make(map[string]*runtimeapi.MetricDescriptor) + for _, desc := range descs { + returnedDescriptors[desc.GetName()] = desc + Expect(desc.GetHelp()).NotTo(BeEmpty(), "Metric descriptor %q should have help text", desc.GetName()) + Expect(desc.GetLabelKeys()).NotTo(BeEmpty(), "Metric descriptor %q should have label keys", desc.GetName()) + } + + missingMetrics := []string{} + + for _, expectedName := range expectedMetricDescriptorNames { + _, found := returnedDescriptors[expectedName] + if !found { + missingMetrics = append(missingMetrics, expectedName) + } + } + + Expect(missingMetrics).To(BeEmpty(), "Expected %s metrics to be present and they were not", strings.Join(missingMetrics, " ")) +} + +// listPodSandboxMetrics lists PodSandboxMetrics. +func listPodSandboxMetrics(c internalapi.RuntimeService) []*runtimeapi.PodSandboxMetrics { + By("List PodSandboxMetrics.") + + metrics, err := c.ListPodSandboxMetrics(context.TODO()) + framework.ExpectNoError(err, "failed to list PodSandboxMetrics: %v", err) + framework.Logf("List PodSandboxMetrics succeed") + + return metrics +} + +// testPodSandboxMetrics verifies that metrics are present for the specified pod. +func testPodSandboxMetrics(allMetrics []*runtimeapi.PodSandboxMetrics, podID string) { + var podMetrics *runtimeapi.PodSandboxMetrics + + for _, m := range allMetrics { + if m.GetPodSandboxId() == podID { + podMetrics = m + + break + } + } + + Expect(podMetrics).NotTo(BeNil(), "Metrics for pod %q should be present", podID) + + metricNamesFound := make(map[string]bool) + + for _, metric := range podMetrics.GetMetrics() { + if !metricNamesFound[metric.GetName()] { + metricNamesFound[metric.GetName()] = true + } + } + + for _, containerMetric := range podMetrics.GetContainerMetrics() { + for _, metric := range containerMetric.GetMetrics() { + if !metricNamesFound[metric.GetName()] { + metricNamesFound[metric.GetName()] = true + } + } + } + + missingMetrics := []string{} + + for _, expectedName := range expectedMetricDescriptorNames { + if !metricNamesFound[expectedName] { + missingMetrics = append(missingMetrics, expectedName) + } + } + + Expect(missingMetrics).To(BeEmpty(), "Expected %s metrics to be present and they were not", strings.Join(missingMetrics, " ")) +}