Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/crio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ jobs:
- name: Configure CRI-O
run: |
sudo mkdir -p /etc/crio/crio.conf.d
printf '[crio.runtime]\nlog_level = "debug"\n[crio.image]\nshort_name_mode = "disabled"\n' | sudo tee /etc/crio/crio.conf.d/01-log-level.conf

printf '[crio.runtime]\nlog_level = "debug"\n[crio.image]\nshort_name_mode = "disabled"\n' | sudo tee -a /etc/crio/crio.conf.d/01-base.conf
printf '[crio.stats]\nincluded_pod_metrics = [\n"disk",\n"diskIO",\n"network",\n"cpu",\n"hugetlb",\n"memory",\n"oom",\n"process",\n"spec",\n]\n' | sudo tee -a /etc/crio/crio.conf.d/01-base.conf
- name: Configure CRI-O to use conmon-rs intead of the default conmon
if: ${{matrix.monitor == 'conmon-rs'}}
run: |
Expand Down
190 changes: 190 additions & 0 deletions pkg/validate/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,66 @@ import (
"context"
"os"
"path/filepath"
"strings"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"google.golang.org/grpc/codes"
grpcstatus "google.golang.org/grpc/status"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"

"sigs.k8s.io/cri-tools/pkg/common"
"sigs.k8s.io/cri-tools/pkg/framework"
)

// expectedMetricDescriptorNames contains all expected metric descriptor names
// based on metrics returned by kubelet with CRI-O and cadvisor on the legacy cadvisor stats provider
// on kubernetes 1.35.
var expectedMetricDescriptorNames = []string{
"container_blkio_device_usage_total",
"container_cpu_system_seconds_total",
"container_cpu_usage_seconds_total",
"container_cpu_user_seconds_total",
"container_file_descriptors",
"container_fs_reads_bytes_total",
"container_fs_reads_total",
"container_fs_usage_bytes",
"container_fs_writes_bytes_total",
"container_fs_writes_total",
"container_last_seen",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few of these metrics are currently not in the implementation in containerd also.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you have a list handy of the ones you aren't yet supporting? is it because you don't plan on it or haven't gotten to it yet
I think for this KEP to go beta each should really support the full set (I say knowing CRI-O is missing some)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From containerd, the below metrics are not reported from metric descriptors

          "container_cpu_load_average_10s",
          "container_cpu_load_d_average_10s",
          "container_file_descriptors",
          "container_last_seen",
          "container_oom_events_total",
          "container_pressure_cpu_stalled_seconds_total",
          "container_pressure_cpu_waiting_seconds_total",
          "container_pressure_io_stalled_seconds_total",
          "container_pressure_io_waiting_seconds_total",
          "container_pressure_memory_stalled_seconds_total",
          "container_pressure_memory_waiting_seconds_total",
          "container_sockets",
          "container_spec_cpu_period",
          "container_spec_cpu_shares",
          "container_spec_memory_limit_bytes",
          "container_spec_memory_reservation_limit_bytes",
          "container_spec_memory_swap_limit_bytes",
          "container_start_time_seconds",
          "container_tasks_state",
          "container_threads",
          "container_ulimits_soft",

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are now added as part of #12426.

Checking why the CI fails on containerd main.

"container_memory_cache",
"container_memory_failcnt",
"container_memory_failures_total",
"container_memory_mapped_file",
"container_memory_max_usage_bytes",
"container_memory_rss",
"container_memory_swap",
"container_memory_usage_bytes",
"container_memory_working_set_bytes",
"container_network_receive_bytes_total",
"container_network_receive_errors_total",
"container_network_receive_packets_dropped_total",
"container_network_receive_packets_total",
"container_network_transmit_bytes_total",
"container_network_transmit_errors_total",
"container_network_transmit_packets_dropped_total",
"container_network_transmit_packets_total",
"container_oom_events_total",
"container_processes",
"container_sockets",
"container_spec_cpu_period",
"container_spec_cpu_shares",
"container_spec_memory_limit_bytes",
"container_spec_memory_reservation_limit_bytes",
"container_spec_memory_swap_limit_bytes",
"container_start_time_seconds",
"container_threads",
"container_threads_max",
"container_ulimits_soft",
}

var _ = framework.KubeDescribe("PodSandbox", func() {
f := framework.NewDefaultCRIFramework()

Expand Down Expand Up @@ -80,6 +130,62 @@ var _ = framework.KubeDescribe("PodSandbox", func() {
podID = "" // no need to cleanup pod
})
})
Context("runtime should support metrics operations", func() {
var podID string
var podConfig *runtimeapi.PodSandboxConfig
BeforeEach(func() {
_, err := rc.ListMetricDescriptors(context.TODO())
if err != nil {
s, ok := grpcstatus.FromError(err)
Expect(ok && s.Code() == codes.Unimplemented).To(BeTrue(), "Expected CRI metric descriptors call to either be not supported, or not error")
if s.Code() == codes.Unimplemented {
Skip("CRI Metrics endpoints not supported by this runtime version")
}
}
})

AfterEach(func() {
if podID != "" {
By("stop PodSandbox")
Expect(rc.StopPodSandbox(context.TODO(), podID)).NotTo(HaveOccurred())
By("delete PodSandbox")
Expect(rc.RemovePodSandbox(context.TODO(), podID)).NotTo(HaveOccurred())
}
})

It("runtime should support returning metrics descriptors [Conformance]", func() {
By("list metric descriptors")
descs := listMetricDescriptors(rc)

By("verify expected metric descriptors are present")
testMetricDescriptors(descs)
})

It("runtime should support listing pod sandbox metrics [Conformance]", func() {
By("create pod sandbox")
podID, podConfig = framework.CreatePodSandboxForContainer(rc)

By("create container in pod")
ic := f.CRIClient.CRIImageClient
containerID := framework.CreateDefaultContainer(rc, ic, podID, podConfig, "container-for-metrics-")

By("start container")
startContainer(rc, containerID)

_, _, err := rc.ExecSync(
context.TODO(), containerID, []string{"/bin/sh", "-c", "for i in $(seq 1 10); do echo hi >> /var/lib/mydisktest/inode_test_file_$i; done; sync"},
time.Duration(defaultExecSyncTimeout)*time.Second,
)

Expect(err).ToNot(HaveOccurred())

By("list pod sandbox metrics")
metrics := listPodSandboxMetrics(rc)

By("verify pod metrics are present")
testPodSandboxMetrics(metrics, podID)
})
})
})

// podSandboxFound returns whether PodSandbox is found.
Expand Down Expand Up @@ -166,6 +272,17 @@ func listPodSandbox(c internalapi.RuntimeService, filter *runtimeapi.PodSandboxF
return pods
}

// listMetricDescriptors lists MetricDescriptors.
func listMetricDescriptors(c internalapi.RuntimeService) []*runtimeapi.MetricDescriptor {
By("List MetricDescriptors.")

descs, err := c.ListMetricDescriptors(context.TODO())
framework.ExpectNoError(err, "failed to list MetricDescriptors status: %v", err)
framework.Logf("List MetricDescriptors succeed")

return descs
}

// createLogTempDir creates the log temp directory for podSandbox.
func createLogTempDir(podSandboxName string) (hostPath, podLogPath string) {
hostPath, err := os.MkdirTemp("", "podLogTest")
Expand Down Expand Up @@ -196,3 +313,76 @@ func createPodSandboxWithLogDirectory(c internalapi.RuntimeService) (sandboxID s

return framework.RunPodSandbox(c, podConfig), podConfig, hostPath
}

// testMetricDescriptors verifies that all expected metric descriptors are present.
func testMetricDescriptors(descs []*runtimeapi.MetricDescriptor) {
returnedDescriptors := make(map[string]*runtimeapi.MetricDescriptor)
for _, desc := range descs {
returnedDescriptors[desc.GetName()] = desc
Expect(desc.GetHelp()).NotTo(BeEmpty(), "Metric descriptor %q should have help text", desc.GetName())
Expect(desc.GetLabelKeys()).NotTo(BeEmpty(), "Metric descriptor %q should have label keys", desc.GetName())
}

missingMetrics := []string{}

for _, expectedName := range expectedMetricDescriptorNames {
_, found := returnedDescriptors[expectedName]
if !found {
missingMetrics = append(missingMetrics, expectedName)
}
}

Expect(missingMetrics).To(BeEmpty(), "Expected %s metrics to be present and they were not", strings.Join(missingMetrics, " "))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Expect(missingMetrics).To(BeEmpty(), "Expected %s metrics to be present and they were not", strings.Join(missingMetrics, " "))
Expect(missingMetrics).To(BeEmpty(), "Expected metrics missing: %s", strings.Join(missingMetrics, ", "))

}

// listPodSandboxMetrics lists PodSandboxMetrics.
func listPodSandboxMetrics(c internalapi.RuntimeService) []*runtimeapi.PodSandboxMetrics {
By("List PodSandboxMetrics.")

metrics, err := c.ListPodSandboxMetrics(context.TODO())
framework.ExpectNoError(err, "failed to list PodSandboxMetrics: %v", err)
framework.Logf("List PodSandboxMetrics succeed")

return metrics
}

// testPodSandboxMetrics verifies that metrics are present for the specified pod.
func testPodSandboxMetrics(allMetrics []*runtimeapi.PodSandboxMetrics, podID string) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are still taking in this function. Only the metrics descriptors test are needed right?

var podMetrics *runtimeapi.PodSandboxMetrics

for _, m := range allMetrics {
if m.GetPodSandboxId() == podID {
podMetrics = m

break
}
}

Expect(podMetrics).NotTo(BeNil(), "Metrics for pod %q should be present", podID)

metricNamesFound := make(map[string]bool)

for _, metric := range podMetrics.GetMetrics() {
if !metricNamesFound[metric.GetName()] {
metricNamesFound[metric.GetName()] = true
}
}

for _, containerMetric := range podMetrics.GetContainerMetrics() {
for _, metric := range containerMetric.GetMetrics() {
if !metricNamesFound[metric.GetName()] {
metricNamesFound[metric.GetName()] = true
}
}
}

missingMetrics := []string{}

for _, expectedName := range expectedMetricDescriptorNames {
if !metricNamesFound[expectedName] {
missingMetrics = append(missingMetrics, expectedName)
}
}

Expect(missingMetrics).To(BeEmpty(), "Expected %s metrics to be present and they were not", strings.Join(missingMetrics, " "))
}
Loading