From cdb10cd6455712d45388027c31567572d122c74c Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Mon, 12 Feb 2024 17:57:06 +0100 Subject: [PATCH 1/8] Helm chart for pcm: old comments: sys/pci/mcfg mounts are unnessesary for indirect method fix old wrong defaults in README fix formatting possible fix for issue with resctrl remove hacks to handle /pcm/resctrl and unessesary out-of-date files update License to use the same as pcm itself update README, remove out-of-date info links do values formatting + links do values update README an values comments update README address jcfunk comments: interval and extra labels for PodMonitor + refactor readme fix typos readme: reminder about removing msr kernel module after rebasing: point to correct default pcm image from intel organization Refactoring: - explicit values file for privileged direct method, - hide (into docs directory) "unprivileged" direct method (and fixes), - remove unnessesary mounts (mcfg, /dev/cpu/dev/mem for privileged access), - add instructions to collection methods, - fixes (extra builder) for build local development image, - silent mode - move collection methods to the top fix values files for direct privileged method New: support for PERFMON capability, silent mode and some extra env debug variables VPA: v1 - first version of vertical pod autoscaler Grafana dashboard: instructions rename resctrlHostMount to resctrlMount fix dashboard rate interval pcm-sensor-server: add new metrics DRAM Local percantage Fix dockerbuild by using separate Dockerfile + build in dockerignore improve dockerfile.debug extra env PCM_NO_MAIN_EXCEPTION_HANDLER --- .dockerignore | 1 + .gitignore | 6 +- Dockerfile.debug | 6 + deployment/pcm/.helmignore | 26 ++ deployment/pcm/Chart.yaml | 9 + deployment/pcm/LICENSE | 30 ++ deployment/pcm/README.md | 385 ++++++++++++++++++ .../docs/direct-unprivileged-deployment.md | 67 +++ .../values-device-injector.yaml | 16 + .../values-direct-unprivileged.yaml | 19 + .../values-smarter-devices-cpu-mem.yaml | 9 + .../helm_chart_test_and_notes_TODO/NOTES.txt | 6 + .../_tests/test-connection.yaml | 15 + deployment/pcm/k8s-test.sh | 77 ++++ deployment/pcm/templates/_helpers.tpl | 79 ++++ deployment/pcm/templates/daemonset.yaml | 201 +++++++++ deployment/pcm/templates/podmonitor.yaml | 41 ++ .../pcm/templates/verticalpodautoscaler.yaml | 40 ++ deployment/pcm/values-direct-privileged.yaml | 16 + deployment/pcm/values-local-image.yaml | 4 + deployment/pcm/values-metal.yaml | 8 + deployment/pcm/values-vm.yaml | 6 + deployment/pcm/values.yaml | 166 ++++++++ src/cpucounters.cpp | 4 +- src/pcm-sensor-server.cpp | 8 + 25 files changed, 1242 insertions(+), 3 deletions(-) create mode 100644 Dockerfile.debug create mode 100644 deployment/pcm/.helmignore create mode 100644 deployment/pcm/Chart.yaml create mode 100644 deployment/pcm/LICENSE create mode 100644 deployment/pcm/README.md create mode 100644 deployment/pcm/docs/direct-unprivileged-deployment.md create mode 100644 deployment/pcm/docs/direct-unprivileged-examples/values-device-injector.yaml create mode 100644 deployment/pcm/docs/direct-unprivileged-examples/values-direct-unprivileged.yaml create mode 100644 deployment/pcm/docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml create mode 100644 deployment/pcm/helm_chart_test_and_notes_TODO/NOTES.txt create mode 100644 deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml create mode 100644 deployment/pcm/k8s-test.sh create mode 100644 deployment/pcm/templates/_helpers.tpl create mode 100644 deployment/pcm/templates/daemonset.yaml create mode 100644 deployment/pcm/templates/podmonitor.yaml create mode 100644 deployment/pcm/templates/verticalpodautoscaler.yaml create mode 100644 deployment/pcm/values-direct-privileged.yaml create mode 100644 deployment/pcm/values-local-image.yaml create mode 100644 deployment/pcm/values-metal.yaml create mode 100644 deployment/pcm/values-vm.yaml create mode 100644 deployment/pcm/values.yaml diff --git a/.dockerignore b/.dockerignore index 796b96d1..b8cad4f8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,2 @@ /build +/deployment diff --git a/.gitignore b/.gitignore index fdf68e8c..2bd0d9df 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,8 @@ latex/ .vs/ .idea/ build -src/simdjson \ No newline at end of file +src/simdjson +/deployment/pcm/smarter-device-manager/ +/deployment/pcm/nri/ +/deployment/pcm/kind-with-registry.sh +/deployment/pcm/autoscaler diff --git a/Dockerfile.debug b/Dockerfile.debug new file mode 100644 index 00000000..da966e97 --- /dev/null +++ b/Dockerfile.debug @@ -0,0 +1,6 @@ +FROM fedora:40@sha256:4e007f288dce23966216be81ef62ba05d139b9338f327c1d1c73b7167dd47312 as builder + +RUN dnf -y install gcc-c++ git findutils make cmake strace gdb util-linux +COPY . /tmp/pcm +RUN --mount=type=cache,target=/tmp/pcm/build cd /tmp/pcm/build && cmake -D CMAKE_BUILD_TYPE=Debug .. && cmake --build . -t pcm pcm-sensor-server pcm-tpmi -j && cp -v /tmp/pcm/build/bin/pcm* /bin/ +#ENV PCM_NO_PERF=1 diff --git a/deployment/pcm/.helmignore b/deployment/pcm/.helmignore new file mode 100644 index 00000000..5dfaad8e --- /dev/null +++ b/deployment/pcm/.helmignore @@ -0,0 +1,26 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ +smarter-device-manager/ +nri/ +autoscaler/ diff --git a/deployment/pcm/Chart.yaml b/deployment/pcm/Chart.yaml new file mode 100644 index 00000000..f200feaa --- /dev/null +++ b/deployment/pcm/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: pcm +version: 0.1.0 +appVersion: "202403" +description: A PCM Helm chart for Kubernetes +home: https://github.com/intel/pcm +maintainers: + - name: Pawel Palucki + email: pawel.palucki@intel.com diff --git a/deployment/pcm/LICENSE b/deployment/pcm/LICENSE new file mode 100644 index 00000000..2d994393 --- /dev/null +++ b/deployment/pcm/LICENSE @@ -0,0 +1,30 @@ +BSD 3-Clause License + +Copyright (c) 2009-2024, Intel Corporation +Copyright (c) 2016-2020, opcm +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md new file mode 100644 index 00000000..338be95c --- /dev/null +++ b/deployment/pcm/README.md @@ -0,0 +1,385 @@ +-------------------------------------------------------------------------------- +Helm chart instructions +-------------------------------------------------------------------------------- + +### Features: + +- Configurable as non-privileged container (value: `privileged`, default: false) and privileged container, +- Support for bare-metal and VM host configurations (files: [values-metal.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)), +- Ability to deploy multiple releases alongside configured differently to handle different kinds of machines (bare-metal, VM) at the [same time](#heterogeneous-mixed-vmmetal-instances-cluster), +- Linux Watchdog handling (controlled with `PCM_KEEP_NMI_WATCHDOG`, `PCM_NO_AWS_WORKAROUND`, `nmiWatchdogMount` values). +- Deploy to own namespace with "helm install ... **-n pcm --create-namespace**". +- Silent mode (value: `silent`, default: false). +- Backward compatible with older Linux kernels (<5.8) - (value: cap_perfmon, default: false). +- VerticalPodAutoscaler (value: `verticalPodAutoscaler.enabled`, default: false) + +Here are available methods in this chart of metrics collection w.r.t interfaces and required access: + +| Method | Used interfaces | default | Notes | instructions | +|-------------------------|----------------------| ------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| unprivileged "indirect" | perf, resctrl | v | recommended, missing metrics: energy metrics (TODO link to issues/PR or node_exporter/rapl_collector) | `helm install . pcm` | +| privileged "indirect" | perf, resctrl | | not recommended, unsecure, no advantages over unprivileged), missing metrics: energy metrics | `helm install . pcm --set privileged=true` | +| privileged "direct" | msr | | not recommended, unsecure and requires msr module pre loaded on host | `helm install . pcm -f values-direct-privileged.yaml` | +| unprivileged "direct" | msr | | not recommended, requires msr module and access to /dev/cpu and /dev/mem (non trivial, like using 3rd plugins) | [link for detailed documentation](docs/direct-unprivileged-deployment.md) | + +For more information about direct/indirect collection methods please see [here](#metric-collection-methods-capabilites-vs-requirements) + +#### Integration features: + +- node-feature-discovery based nodeSelector and nodeAffinity (values: `nfd`, `nfdBaremetalAffinity`, `nfdRDTAffinity`), +- Examples for non-privileged mode using device plugin ("smarter-devices-manager") or using NRI device-injector plugin (TODO) (file: [values-smarter-devices-cpu-mem.yaml](values-smarter-devices-cpu-mem.yaml) ), +- Integration with NRI balloons policy plugin (value: `nriBalloonsPolicyIntegration`), + +#### Debugging features: + +- Local image registry for development (file: [values-local-image.yaml](values-local-image.yaml) ), +- Deploy Prometheus operator' PodMonitor (value: `podMonitor`) + +### Getting started + +#### Indirect non-privileged method using Linux abstractions (perf/resctrl) default. + +```sh +helm install pcm . +``` + +#### Direct privileged method +``` +helm install pcm . -f values-direct-privileged.yaml +``` + +#### All opt-in features: Node-feature-discovery + Prometheus podMonitor + vertical + +``` +helm install ... --set nfd=true --set podMonitor=true --set verticalPodAutoscaler.enabled=true +``` + +### Requirements + +- Full set of metrics (uncore/UPI, RDT, energy) requires bare-metal or .metal cloud instance. +- /sys/fs/resctrl has to be mounted on host OS (for default indirect deployment method) +- pod is allowed to be run with privileged capabilities (SYS_ADMIN, SYS_RAWIO) on given namespace in other words: Pod Security Standards allow to run on privileged level, + +``` + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/enforce-version: latest + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/audit-version: latest + pod-security.kubernetes.io/warn: privileged + pod-security.kubernetes.io/warn-version: latest +``` + +More information here: https://kubernetes.io/docs/tutorials/security/ns-level-pss/ . + +### Defaults + +- Indirect method uses Linux abstraction to access event counters (Linux Perf, resctrl) and run container in non-privileged mode. +- hostPort 9738 is exposed on host. (TODO: security review, consider TLS, together with Prometheus scrapping !!). +- Prometheus podMonitor is disabled (enabled it with --set podMonitor=true). + +### Validation on local kind cluster + +#### Requirements + +- kubectl/kind/helm/jq binaries available in PATH, +- docker service up and running. +- full set of metrics available only bare-metal instance or Cloud .metal instance. + +#### 1) (Optionally) mount resctrl filesystem (for RDT metrics) to unload "msr" kernel module for validation + +``` +mount -t resctrl resctrl /sys/fs/resctrl +``` + +For validation to verify that all metrics are available without msr, unload "msr" module from kernel and perf_event_paranoid has default value +``` +rmmod msr +echo 2 > /proc/sys/kernel/perf_event_paranoid +``` + +#### 2) Create kind based Kubernetes cluster + +``` +kind create cluster +``` + +**Note** to be able to collect and test RDT metrics through resctrl filesystem, kind cluster have to be created with additional mounts: +``` +nodes: +- role: control-plane + extraMounts: + - hostPath: /sys/fs/resctrl + containerPath: /sys/fs/resctrl +``` +e.g. create kind cluster with local registry with [this script](https://kind.sigs.k8s.io/docs/user/local-registry/) +and apply the patch to enable resctrl win following way: + +``` +wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh + +sed -i '/apiVersion: kind.x-k8s.io\/v1alpha4/a \ +nodes:\ +- role: control-plane\ + extraMounts:\ + - hostPath: /sys/fs/resctrl\ + containerPath: /sys/fs/resctrl\ +' kind-with-registry.sh +``` + +Then create cluster using above patched script: +``` +bash kind-with-registry.sh +``` + +Check that resctrl is available inside kind node: +``` +docker exec kind-control-plane ls /sys/fs/resctrl/info +# expected output: +# L3_MON +# MB +# ... +``` + + +and optionally local registry is running (to be used with local pcm build images, more detail [below](development-with-local-images-and-testing)) +``` +docker ps | grep kind-registry +# expected output: +# e57529be23ea registry:2 "/entrypoint.sh /etc…" 3 weeks ago Up 3 weeks 127.0.0.1:5001->5000/tcp kind-registry +``` + +Export kind kubeconfig as default for further kubectl commands: +``` +kind export kubeconfig +kubectl get pods -A +``` + +#### 3) (Optionally) Deploy Node Feature Discovery (nfd) + +``` +# I.a. Using Kustomize: +kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.16.0-devel + +# I.b. or with Helm Chart: +helm repo add nfd https://kubernetes-sigs.github.io/node-feature-discovery/charts +helm repo update +helm install nfd/node-feature-discovery --namespace node-feature-discovery --create-namespace --generate-name + +# II. Check node "labels" with CPU features are added +kubectl get node kind-control-plane -o yaml | grep feature.node +``` + +#### 4) (Optionally) Deploy Prometheus operator + +``` +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false +kubectl get sts prometheus-prometheus-kube-prometheus-prometheus +``` + +Note: `podMonitorSelectorNilUsesHelmValues` is disabled (set to false) so Prometheus operator will be able to handle PCM podMonitor deployed without extra `podMonitorLabels` or otherwise pcm need to be deployed like this: +`helm install pcm . --set podMonitor=true --set podMonitorLabels.release=prometheus` (assuming Prometheus operator was deployed as "prometheus") + + +#### 5) (Optionally) Deploy metric-server and vertical-pod-autoscaler + +Note this is irrelevant to pcm-sensor-server functionality, but useful to observer pcm pod CPU/memory usage: + +a) metric-server + +``` +helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ +helm repo update +helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system +``` + +b) vertical pod autoscaler + +``` +git clone https://github.com/kubernetes/autoscaler +./autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh +``` + +#### 6) Deploy PCM helm chart + +``` +# a) Deploy to current namespace with defaults +helm install pcm . + +# b) Alternatively deploy with NFD and/or with Prometheus enabled +helm install pcm . --set podMonitor=true +helm install pcm . --set nfd=true + +# c) Alternatively deploy into own "pcm" namespace +helm install pcm . --namespace pcm +``` + +#### 7) Check metrics are exported + +Run proxy in background: +``` +kubectl proxy & +``` + +Access PCM metrics directly: + +```sh +kubectl get daemonset pcm +kubectl get pods +podname=`kubectl get pod -l app.kubernetes.io/component=pcm-sensor-server -ojsonpath='{.items[0].metadata.name}'` + +curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics +curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep L3_Cache_Misses # source: core +curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep DRAM_Writes # source: uncore +curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep Local_Memory_Bandwidth{socket="1",aggregate="socket",source="core"} # source: RDT +curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep DRAM_Joules_Consumed # source: energy +``` + +... or through Prometheus UI/prom tool (requires prometheus operator to be deployed and helm install with with `--set podMonitor=true`): +``` +http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/graph +promtool query range --step 1m http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy 'rate(DRAM_Writes{aggregate="system"}[5m])/1e9' +promtool query instant http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy 'avg by(__name__) ({job="pcm"})' +``` + +... or through Grafana with generated dashboard: + +``` + + +# 1) Download dashboard +curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/dashboard/prometheus -o pcm-dashboard.json + +# change default (too small) interval (from 4s to 2m, following Prometheus best practicies of rate being four times larger than scrapping 30s) +# References: +# https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/ +# ($__rate_interval is 4 x scrape interval defined in datasource provisioned by prometheus operator, scrape internval is based on Prometheus object which defaults to 30s) +# - https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml#L1069 +# - https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml#L3381 +sed -i 's/4s/$__rate_interval/g' pcm-dashboard.json + +# 2) port forward with kubectl (--address=0.0.0.0) +kubectl port-forward -n default service/prometheus-grafana 8002:80 + +# 3) User: admin/prom-operator +# or get password kubectl get secret --namespace default prometheus-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo +http://127.0.0.1:8002 + +# 4) Go to Dashboards/New/Import and upload: + +pcm-dashboard.json + +``` + +### Deploy alternative options + +#### Direct (msr access) as privileged container +``` +helm install pcm . -f values-direct-privileged.yaml +``` + +#### Homogeneous bare metal instances cluster (full set of metrics) + +``` +helm install pcm . -f values-metal.yaml +``` + +#### Homogenizer VM instances cluster (limited set of metrics core) + +``` +helm install pcm . -f values-vm.yaml +``` + +#### Heterogeneous (mixed VM/metal instances) cluster + +``` +helm install pcm-vm . -f values-vm.yaml +helm install pcm-metal . -f values-metal.yaml +``` + +#### Direct method as non-privileged container (not recommended) + +**Note** PCM requires access to /dev/cpu device in read-write mode (MSR access) but it is no possible currently to mount devices in Kubernetes pods/containers in vanilla Kubernetes for unprivileged containers. Please find more about this limitation https://github.com/kubernetes/kubernetes/issues/5607. + +To expose necessary devices to pcm-sensor-server, one can use: + +a) Kubernetes device plugin (using Kubernetes [CDI](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/) interface), +b) containerd plugin (using [NRI](https://github.com/containerd/nri/) interface), + +Examples can be find [here](docs/direct-unprivileged-deployment.md). + +#### Development (with local images) and testing + +1) Setup kind with registry following this instruction: https://kind.sigs.k8s.io/docs/user/local-registry/ +``` +wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh +bash kind-with-registry.sh +``` + +2) Build docker image and upload to local registry (from project root directory) +``` +docker build . -t localhost:5001/pcm-local +docker push localhost:5001/pcm-local + +# optionally create buildx based builder +mkdir ~/.docker/cli-plugins +curl -sL https://github.com/docker/buildx/releases/download/v0.14.0/buildx-v0.14.0.linux-amd64 -o ~/.docker/cli-plugins/docker-buildx +chmod +x ~/.docker/cli-plugins/docker-buildx +docker buildx create --driver docker-container --name mydocker --use --bootstrap + +# or with single line (from deployment/pcm/ directory) +# Build local image for tests/development +# Following Dockerfile contains source code of pcm and some debugging utils (like gdb,strace for further analysis) +(cd ../.. ; docker build . -f Dockerfile.debug -t localhost:5001/pcm-local && docker push localhost:5001/pcm-local) +``` + +3) When deploying to kind cluster pcm use values to switch to local pcm-local image +``` +helm install pcm . -f values-local-image.yaml +``` + +4) Replace pcm-sensor-server with pcm or sleep to be able to run `gdb` or `strace` for example +``` +helm upgrade --install pcm . --set debugPcm=true +helm upgrade --install pcm . --set debugSleep=true +``` + +**TODO:** consider debug options to be removed before release for security reasons + +5) Check logs or interact with container directly: +``` +# exec into pcm container +kubectl exec -ti ds/pcm -- bash +# or check logs +kubectl logs ds/pcm +``` + +### Metric collection methods (capabilities vs requirements) + + + +| Metrics | Available on Hardware | Available through interface | Available through method | +| --------------------- | ----------------------------- | ---------------------------- | ------------------------ | +| core | bare-metal, VM (any) | msr or perf | any | +| uncore (UPI) | bare-metal, VM (all sockets) | msr or perf | any | +| RDT (MBW,L3OCCUP) | bare-metal, VM (all sockets) | msr or resctrl | any | +| energy, temp | bare-metal (only) | msr | direct | +| perf-topdown | | perf only | indirect | + + +| Interface | Requirements | Controlled by (env/helm value) | default helm | Used by source code | Notes | +|---------------|------------------------------------------------------------|---------------------------------|-----------------------|----------------------------------------------------------|-----------------------------------------------------| +| perf | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN | PCM_NO_PERF | use perf | programPerfEvent(), PerfVirtualControlRegister() | | +| perf-uncore | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN | PCM_USE_UNCORE_PERF | use perf for uncore | programPerfEvent(), PerfVirtualControlRegister() | | +| perf-topdown | /sys/bus/event_source/devices/cpu/events | sysMount | yes | cpucounters.cpp:perfSupportsTopDown() | TODO: conflicts with sys/fs/resctrl | +| RDT | uses "msr" or "resctrl" interface | PCM_NO_RDT | yes | cpucounters.cpp:isRDTDisabled()/QOSMetricAvailable() | | +| resctrl | RW: /sys/fs/resctrl | PCM_USE_RESCTRL | yes | resctrl.cpp | resctrlMount | +| watchdog | RO/RW: /proc/sys/kernel/nmi_watchdog | PCM_KEEP_NMI_WATCHDOG | yes (tries to disable)| src/cpucounters.cpp:disableNMIWatchdog() | | +| msr | RW: /dev/cpu/X/msr + privileged or CAP_ADMIN/CAP_RAWIO | PCM_NO_MSR | msr is disabled | msr.cpp:MsrHandle() | privileged or some method to access /dev/cpu | +| | RW: /dev/mem | ? | msr is disabled | cpucounters.cpp:initUncoreObjects, pci.cpp:PCIHandleM() | privileged or some method to access /dev/cpu | +| | RO/RW: /sys/module/msr/parameters | PCM_NO_MSR | msr is disabled | msr.cpp:MsrHandle() | sysMount | +| | RW: /proc/bus/pci | PCM_USE_UNCORE_PERF | msr is disabled | pci.cpp:PCIHandle() | pciMount | +| | RO: /sys/firmware/acpi/tables/MCFG | PCM_USE_UNCORE_PERF | msr is disabled | pci.cpp:PciHandle::openMcfgTable() | mcfgMount | +| | energy | | | cpucounters.cpp initEnergyMonitoring() | | + + diff --git a/deployment/pcm/docs/direct-unprivileged-deployment.md b/deployment/pcm/docs/direct-unprivileged-deployment.md new file mode 100644 index 00000000..fd760a17 --- /dev/null +++ b/deployment/pcm/docs/direct-unprivileged-deployment.md @@ -0,0 +1,67 @@ +-------------------------------------------------------------------------------- +Examples of deploying with direct MSR access as non-privileged container +-------------------------------------------------------------------------------- + +#### Direct method as non-privileged container (not recommended) + +##### a) Device injection using 3rd party device-plugin + +TO run PCM with as non privileged pod, we can third party devices plugins e.g.: + +- https://github.com/smarter-project/smarter-device-manager +- https://github.com/squat/generic-device-plugin +- https://github.com/everpeace/k8s-host-device-plugin + +**Warning** This plugins were NOT audited for security concerns, **use it at your own risk**. + +Below is example how to pass /dev/cpu and /dev/mem using smarter-device-manager in kind based Kubernetes test cluster. + +``` +# Label node to deploy device plugin on that node +kubectl label node kind-control-plane smarter-device-manager=enabled + +# Install "smarter-device-manager" device plugin with only /dev/cpu and /dev/mem devices enabled: +git clone https://github.com/smarter-project/smarter-device-manager +helm install smarter-device-plugin --create-namespace --namespace smarter-device-plugin smarter-device-manager/charts/smarter-device-manager --set 'config[0].devicematch=^cpu$' --set 'config[0].nummaxdevices=1' --set 'config[1].devicematch=^mem$' --set 'config[1].nummaxdevices=1' + +# Check that cpu and mem devices are available - should return "1" +kubectl get node kind-control-plane -o json | jq .status.capacity + +# Install pcm helm chart in unprivileged mode with extraResources for cpu and memory devices. +helm install pcm . -f docs/direct-unprivileged-examples/values-direct-unprivileged.yaml -f docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml +``` + +##### b) Device injection using NRI plugin device-injection + +**TODO**: **Warning** This is work in progress, because it is needed to manually specific all /dev/cpu/XX/msr devices, which is unpractical in production (TO BE MOVED TO EXTERNAL FILE). + +``` +git clone https://github.com/containerd/nri/ +(cd nri/plugins/device-injector/ && go build ) +docker cp kind-control-plane:/etc/containerd/config.toml config.toml + +cat >>config.toml < 0.09090909090909094 @[1707901856.957] +Clock_Unhalted_Ref => 1010026077.3913049 @[1707901856.957] +Clock_Unhalted_Thread => 1295730425.8695648 @[1707901856.957] +DRAM_Joules_Consumed => 0 @[1707901856.957] +DRAM_Reads => 3600814506.6666665 @[1707901856.957] +DRAM_Writes => 1974366592 @[1707901856.957] +Embedded_DRAM_Reads => 0 @[1707901856.957] +Embedded_DRAM_Writes => 0 @[1707901856.957] +Incoming_Data_Traffic_On_Link_0 => 689786624 @[1707901856.957] +Incoming_Data_Traffic_On_Link_1 => 689454432 @[1707901856.957] +Incoming_Data_Traffic_On_Link_2 => 0 @[1707901856.957] +Instructions_Retired_Any => 749013885.5739133 @[1707901856.957] +Invariant_TSC => 432975372048881700 @[1707901856.957] +L2_Cache_Hits => 3531524.973913045 @[1707901856.957] +L2_Cache_Misses => 2334387.130434784 @[1707901856.957] +L3_Cache_Hits => 1325323.1739130428 @[1707901856.957] +L3_Cache_Misses => 627863.4000000003 @[1707901856.957] +L3_Cache_Occupancy => 0 @[1707901856.957] +Local_Memory_Bandwidth => 0 @[1707901856.957] +Measurement_Interval_in_us => 14507400443881 @[1707901856.957] +Memory_Controller_IO_Requests => 0 @[1707901856.957] +Number_of_sockets => 2 @[1707901856.957] +OS_ID => 55.499999999999986 @[1707901856.957] +Outgoing_Data_And_Non_Data_Traffic_On_Link_0 => 1843333122.5 @[1707901856.957] +Outgoing_Data_And_Non_Data_Traffic_On_Link_1 => 1849219231.5 @[1707901856.957] +Outgoing_Data_And_Non_Data_Traffic_On_Link_2 => 0 @[1707901856.957] +Package_Joules_Consumed => 0 @[1707901856.957] +Persistent_Memory_Reads => 0 @[1707901856.957] +Persistent_Memory_Writes => 0 @[1707901856.957] +RawCStateResidency => 89486131.66409859 @[1707901856.957] +Remote_Memory_Bandwidth => 0 @[1707901856.957] +SMI_Count => 0 @[1707901856.957] +Thermal_Headroom => -2147483648 @[1707901856.957] +Utilization_Incoming_Data_Traffic_On_Link_0 => 0 @[1707901856.957] +Utilization_Incoming_Data_Traffic_On_Link_1 => 0 @[1707901856.957] +Utilization_Incoming_Data_Traffic_On_Link_2 => 0 @[1707901856.957] +Utilization_Outgoing_Data_And_Non_Data_Traffic_On_Link_0 => 0 @[1707901856.957] +Utilization_Outgoing_Data_And_Non_Data_Traffic_On_Link_1 => 0 @[1707901856.957] +Utilization_Outgoing_Data_And_Non_Data_Traffic_On_Link_2 => 0 @[1707901856.957] +``` diff --git a/deployment/pcm/templates/_helpers.tpl b/deployment/pcm/templates/_helpers.tpl new file mode 100644 index 00000000..fffa7025 --- /dev/null +++ b/deployment/pcm/templates/_helpers.tpl @@ -0,0 +1,79 @@ +{{/* Expand the name of the chart. */}} +{{- define "pcm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. */}} +{{- define "pcm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Create chart name and version as used by the chart label. */}} +{{- define "pcm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Selector labels */}} +{{- define "pcm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "pcm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/component: pcm-sensor-server +{{- end }} + +{{/* Common labels */}} +{{- define "pcm.labels" -}} +helm.sh/chart: {{ include "pcm.chart" . }} +{{ include "pcm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* SecurityContext privileged or capabilties */}} +{{- define "pcm.securityContext" -}} +securityContext: +{{- if .Values.privileged }} + privileged: true +{{- else -}} + {{/* TODO? + readOnlyRootFilesystem: false + runAsUser: 0 + runAsGroup: 0 + ## below two doesnt work on container level! + fsGroup: 0 + supplementalGroups: [0] + seccompProfile: + #type: RuntimeDefault + type: Unconfined + */}} + capabilities: + add: + - {{ if .Values.cap_perfmon }}PERFMON{{ else }}SYS_ADMIN{{ end }} + - SYS_RAWIO +{{- end }} +{{- end }} + + +{{/* Probes: liveness and readiness probe */}} +{{- define "pcm.probe" -}} +failureThreshold: 3 +httpGet: + path: / + port: 9738 + scheme: HTTP +periodSeconds: 10 +successThreshold: 1 +timeoutSeconds: 1 +{{- end }} diff --git a/deployment/pcm/templates/daemonset.yaml b/deployment/pcm/templates/daemonset.yaml new file mode 100644 index 00000000..6625fd15 --- /dev/null +++ b/deployment/pcm/templates/daemonset.yaml @@ -0,0 +1,201 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "pcm.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "pcm.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "pcm.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "pcm.labels" . | nindent 8 }} + annotations: + {{- with .Values.podAnnotations }}{{- toYaml . | nindent 8 }}{{- end }} + {{- if .Values.nriBalloonsPolicyIntegration }} + cpu.preserve.resource-policy.nri.io: "true" + {{- end }} + spec: + nodeSelector: + {{- with .Values.nodeSelector -}}{{- toYaml . | nindent 8 -}}{{- end -}} + {{- if .Values.nfd }} + feature.node.kubernetes.io/cpu-model.vendor_id: Intel + {{- if .Values.nfdRDTAffinity }} + feature.node.kubernetes.io/cpu-rdt.RDTCMT: "true" + feature.node.kubernetes.io/cpu-rdt.RDTL3CA: "true" + feature.node.kubernetes.io/cpu-rdt.RDTMBA: "true" + feature.node.kubernetes.io/cpu-rdt.RDTMBM: "true" + feature.node.kubernetes.io/cpu-rdt.RDTMON: "true" + {{- end }} + {{- if .Values.nfdBaremetalAffinity}} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR" + operator: DoesNotExist + {{- end }} + {{- end }} {{/* if nfd */}} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end -}} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + terminationGracePeriodSeconds: 0 + containers: + - name: pcm + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- include "pcm.securityContext" . | nindent 8 }} + {{- if .Values.silent }} + command: + - "/usr/local/bin/pcm-sensor-server" + - "-p" + - "9738" + - "-r" + - "-silent" + {{- end -}} + {{- if .Values.debugSleep }} + command: + - /usr/bin/sleep + - inf + {{- end -}} + {{- if .Values.debugPcm }} + command: + - /bin/bash + - -c + - "/usr/local/bin/pcm 2 -r -nc -nsys{{ if .Values.silent }} -silent{{ end }}" + {{- end -}} + {{- if .Values.resctrlInternalMount }} + # Ugly hack to mount resctrl inside only for baremetal when we want use resctrl abstraction and is not mounted on HOST: TBC conflicts with + command: + - /bin/bash + - -c + - "dnf install -q -y util-linux-core; mount -t resctrl resctrl /sys/fs/resctrl; /usr/local/bin/pcm-sensor-server -p 9738 -r" + {{- end -}} + {{/* ALREADY DONE by securityContext on pod level + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 12 }} + */}} + resources: + requests: + {{ with .Values.cpuRequest }}cpu: {{.}}{{ end }} + {{ with .Values.memoryRequest }}memory: {{.}}{{ end }} + {{- with .Values.extraResources }} {{- toYaml .requests | nindent 12 }} {{- end }} + limits: + {{ with .Values.cpuLimit }}cpu: {{.}}{{ end }} + {{ with .Values.memoryLimit }}memory: {{.}}{{ end }} + {{- with .Values.extraResources }} {{- toYaml .limits | nindent 12 }} {{- end }} + env: + - name: PCM_NO_MSR + value: {{ .Values.PCM_NO_MSR | quote }} + - name: PCM_NO_PERF + value: {{ .Values.PCM_NO_PERF | quote }} + - name: PCM_USE_UNCORE_PERF + value: {{ .Values.PCM_USE_UNCORE_PERF | quote }} + - name: PCM_NO_RDT + value: {{ .Values.PCM_NO_RDT | quote }} + - name: PCM_USE_RESCTRL + value: {{ .Values.PCM_USE_RESCTRL | quote }} + - name: PCM_IGNORE_ARCH_PERFMON + value: {{ .Values.PCM_IGNORE_ARCH_PERFMON | quote }} + - name: PCM_KEEP_NMI_WATCHDOG + value: {{ .Values.PCM_KEEP_NMI_WATCHDOG | quote }} + - name: PCM_NO_AWS_WORKAROUND + value: {{ .Values.PCM_NO_AWS_WORKAROUND | quote }} + - name: PCM_NO_UNCORE_PMU_DISCOVERY + value: {{ .Values.PCM_NO_UNCORE_PMU_DISCOVERY | quote }} + - name: PCM_PRINT_UNCORE_PMU_DISCOVERY + value: {{ .Values.PCM_PRINT_UNCORE_PMU_DISCOVERY | quote }} + - name: PCM_PRINT_TOPOLOGY + value: {{ .Values.PCM_PRINT_TOPOLOGY | quote }} + - name: PCM_NO_MAIN_EXCEPTION_HANDLER + value: {{ .Values.PCM_NO_MAIN_EXCEPTION_HANDLER | quote }} + {{- with .Values.probes }} + livenessProbe: + {{- include "pcm.probe" . | nindent 12 }} + readinessProbe: + {{- include "pcm.probe" . | nindent 12 }} + {{- end }} + {{- with .Values.hostPort }} + ports: + - containerPort: 9738 + hostPort: {{ . }} + name: pcm-metrics + protocol: TCP + {{- end }} + volumeMounts: + # {{- if .Values.privileged }} + # - mountPath: /pcm/dev/cpu + # name: dev-cpu + # readOnly: false + # - mountPath: /pcm/dev/mem + # name: dev-mem + # readOnly: false + # {{- end }} + {{- if .Values.pciMount }} + - mountPath: /pcm/proc/bus/pci + name: proc-pci + {{- end }} + {{- if .Values.sysMount }} + - mountPath: /pcm/sys + name: sysfs + readOnly: true + {{- end }} + {{- if .Values.nmiWatchdogMount }} + - mountPath: /pcm/proc/sys/kernel/nmi_watchdog + name: nmi-watchdog + readOnly: true # RW? # TODO + {{- end }} + {{- if .Values.resctrlMount }} + - mountPath: /sys/fs/resctrl + name: sysfs-resctrl + {{- end }} + # TODO: to be removed, already handled by /sysMount + # {{- if .Values.mcfgMount }} + # - mountPath: /pcm/sys/firmware/acpi/tables/MCFG + # name: sys-acpi + # readOnly: true + # {{- end }} + volumes: + # {{- if .Values.privileged }} + # - name: dev-cpu + # hostPath: + # path: /dev/cpu + # - name: dev-mem + # hostPath: + # path: /dev/mem + # {{- end}} + {{- if .Values.sysMount }} + - name: sysfs + hostPath: + path: /sys + {{- end}} + {{- if .Values.pciMount }} + - name: proc-pci + hostPath: + path: /proc/bus/pci + {{- end}} + {{- if .Values.nmiWatchdogMount }} + - name: nmi-watchdog + hostPath: + path: /proc/sys/kernel/nmi_watchdog + {{- end }} + # TODO: to be removed, already handled by /sysMount + # {{- if .Values.mcfgMount }} + # - name: sys-acpi + # hostPath: + # path: /sys/firmware/acpi/tables/MCFG + # {{- end }} + {{- if .Values.resctrlMount }} + - name: sysfs-resctrl + hostPath: + path: /sys/fs/resctrl + {{- end }} diff --git a/deployment/pcm/templates/podmonitor.yaml b/deployment/pcm/templates/podmonitor.yaml new file mode 100644 index 00000000..b9477e95 --- /dev/null +++ b/deployment/pcm/templates/podmonitor.yaml @@ -0,0 +1,41 @@ +{{- if .Values.podMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ include "pcm.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "pcm.labels" . | nindent 4 }} + app.kubernetes.io/component: metrics + jobLabel: pcm + {{- with .Values.podMonitorLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + attachMetadata: + node: true + jobLabel: jobLabel + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + podMetricsEndpoints: + # requires hostPort to be set {{ required "A valid .Values.hostPort is required with PodMonitor enabled " .Values.hostPort }} + - enableHttp2: false + filterRunning: true + followRedirects: false + honorLabels: true + honorTimestamps: true + path: /metrics + port: pcm-metrics + interval: {{ .Values.podMonitorInterval | quote }} + relabelings: + - sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: nodename + scheme: http + selector: + matchLabels: + app.kubernetes.io/component: pcm-sensor-server + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: pcm +{{- end }} diff --git a/deployment/pcm/templates/verticalpodautoscaler.yaml b/deployment/pcm/templates/verticalpodautoscaler.yaml new file mode 100644 index 00000000..9d0941d9 --- /dev/null +++ b/deployment/pcm/templates/verticalpodautoscaler.yaml @@ -0,0 +1,40 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ include "pcm.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "pcm.labels" . | nindent 4 }} +spec: + {{- with .Values.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: pcm + {{- with .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ . }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml . | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + kind: DaemonSet + name: {{ include "pcm.fullname" . }} + {{- with .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deployment/pcm/values-direct-privileged.yaml b/deployment/pcm/values-direct-privileged.yaml new file mode 100644 index 00000000..c307438d --- /dev/null +++ b/deployment/pcm/values-direct-privileged.yaml @@ -0,0 +1,16 @@ +#### Tunning for "direct" privilaged access +privileged: true + +# Switch PCM to use msr access always +PCM_NO_MSR: 0 # use MSR +PCM_NO_PERF: 1 # do not use Linux perf +PCM_USE_UNCORE_PERF: 0 # also use MSR for uncore +PCM_NO_RDT: 0 # Enable RDT metrics ... +PCM_USE_RESCTRL: 0 # but using MSR (no resctrl filesystem) + +# with privileged container addtional mounts aren't required +resctrlMount: false # with MSR resctrl mount is not needed +resctrlInsideMount: false +sysMount: false +pciMount: false +mcfgMount: false diff --git a/deployment/pcm/values-local-image.yaml b/deployment/pcm/values-local-image.yaml new file mode 100644 index 00000000..7d1c336d --- /dev/null +++ b/deployment/pcm/values-local-image.yaml @@ -0,0 +1,4 @@ +image: + repository: localhost:5001/pcm-local + tag: "latest" + pullPolicy: Always diff --git a/deployment/pcm/values-metal.yaml b/deployment/pcm/values-metal.yaml new file mode 100644 index 00000000..1ca73c1e --- /dev/null +++ b/deployment/pcm/values-metal.yaml @@ -0,0 +1,8 @@ +#### ================ Tunning for bare-metal instances ================ +# with node-feature-discovery node affinity for non hypervisor and RDT +nmiWatchdogMount: false +PCM_NO_AWS_WORKAROUND: 1 +PCM_KEEP_NMI_WATCHDOG: 0 +nfd: true +nfdBaremetalAffinity: true +nfdRDTAffinity: true diff --git a/deployment/pcm/values-vm.yaml b/deployment/pcm/values-vm.yaml new file mode 100644 index 00000000..e9a43327 --- /dev/null +++ b/deployment/pcm/values-vm.yaml @@ -0,0 +1,6 @@ +#### ================ Tunning for VM ================ +nmiWatchdogMount: true + +# Disable RDT because is not avaiable for VM instances +PCM_NO_RDT: 1 +resctrlMount: false diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml new file mode 100644 index 00000000..b8eda605 --- /dev/null +++ b/deployment/pcm/values.yaml @@ -0,0 +1,166 @@ +### -------------- Naming ------------------- +# used in +# - common label: app.kubernetes.io/name otherwise "Chart name" +# - also in selectorLabels together with release.name +# defaults to "Chart.name" +nameOverride: "" +# Used as daemonset name (usually based on truncated "name + release name") +fullnameOverride: "" + +### -------------- Image options ------------ +image: + repository: ghcr.io/intel/pcm + pullPolicy: IfNotPresent + tag: "latest" # uses .Chart.AppVersion if empty +imagePullSecrets: {} + +### -------------- Security ------------------ +# Configures SecurityContext to not privileged (by default) so SYS_ADMIN/SYS_RAWIO capabilietes are required for running pod +privileged: false + +# Use new kernel 5.8+ PERFMON (least privileged) instead of generic SYS_ADMIN capability +# !Warning requires kernel 5.8+ +# more info here: https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html#perf-events-access-control +cap_perfmon: true + +# Run pcm in silent mode (additional -silent argument to pcm-sensor-server binary) +# Removes some of debug outputs (like warnings about unability to open some /sys... /proc... files) +silent: false + +### -------------- Required OS affinity ------- +# Should only running on linux +nodeSelector: + kubernetes.io/os: linux + +### -------------- Probes --------------------- +probes: false + +### ================ Metrics configuration ====================== + +### -------------- Metrics: Uncore ------------ +# Mounts section +# NOTE: only required for direct mode +# required for uncore metrics discovery and working only in baremetal, not available for VM +sysMount: false # mounts host /sys into container /pcm/sys/ +pciMount: false # mounts host /proc/bus/pci into container /pcm/proc/bus/pci/ + +# NOTE this is only required for direct unprivileged mode ?!?!?! +# TODO: to be removed!!!?!?!!?!? (already coverred sysMounts !!!!) +#mcfgMount: false # mounts hosts: /sys/firmware/acpi/tables/MCFG -> /pcm/sys/firmware/acpi/tables/MCFG + +### linux Perf (indirect) vs msr(direct) +# Lets try "indirect" as default +PCM_NO_MSR: 1 # do not use MSR +PCM_NO_PERF: 0 # use Linux Perf over MSR for core metrics +PCM_USE_UNCORE_PERF: 1 # use Linux Perf instead of MSR for uncore metrics (collection+detection) + +### -------------- Metrics: RDT --------------- +### RDT rdt/resctrl: +PCM_NO_RDT: 0 # 0 - try to collect RDT data, enables local/remote memory bandwidth + llc occupancy +PCM_USE_RESCTRL: 1 # use Linux Perf instead of MSR access (more reliable) +# required for indirect RDT access, not available for VM only in baremetal +# do not mount by default RDT can be also accessed through direct MSR programming +resctrlMount: true # mount from external host +resctrlInsideMount: false # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require + +### -------------- Other (NMI handling and/or on VM/AWS) +PCM_IGNORE_ARCH_PERFMON: 0 # After VM is detected through CPUID (hypervisor flag) - check arch_perfmon flag to be also enabled - fail if not avaiable (0 - do check, 1 - disable check) +# 0: Disabling NMI watchdog since it consumes one hw-PMU counter, requires nmiWatchdogMount to be true +# 1: don't disable NMI watchdog (reducing the core metrics set) - prefferd for production usage! +# but even with 0 automatic AWS workround applies! +PCM_KEEP_NMI_WATCHDOG: 0 +# workaround: after VM is detected: "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n";) +# 1: disables workaround and tries to use four programable counters (without workaround on VM will pcm-sensor-server will hang) +# Please do not disable (value=1) on VMs +PCM_NO_AWS_WORKAROUND: 0 + +# mounting watchdog is recommened when PCM_KEEP_NMI_WATCHDOG=0 or we expect AWS workaround to be applied +nmiWatchdogMount: true + +### -------------- Other (Debugging options for uncore pmu discovery) +PCM_NO_UNCORE_PMU_DISCOVERY: 0 # skip 1: this is not required for direct privileged access and with 0 ends with WARNING enumaration failed +PCM_PRINT_UNCORE_PMU_DISCOVERY: 1 # show: discovered pmu +PCM_PRINT_TOPOLOGY: 0 # show individual CPU topology for each core (plenty of lines) +PCM_NO_MAIN_EXCEPTION_HANDLER: 0 # show full call stack of error + +### =============================== Optional POD fields no related to PCM =============================== +# Pod level +podAnnotations: {} +podLabels: {} +# Container level +tolerations: [] +# Resources cpu/mem +cpuLimit: 100m +cpuRequest: 100m +memoryLimit: 512Mi +memoryRequest: 256Mi +# requests, limits level need to be specified here +extraResources: {} + +### =============================== Integrations with other projects ==================================== +# +### -------------- Prometheus operator -------------------- +# Expose run containerPort "pcm-sensor-server -p 9738" as hostPort, can be empty to disable hostPort +hostPort: 9738 +# Deploy PromtheusOperator PodMonitor (requires hostPort to be not empty) +podMonitor: false +# Extra PodMonitor labels to let Prometheus operator filter based on that +# e.g. default "kube-prometheus-stack" helm chart requires additional release:"{name of chart release}" label in podMonitor to be considered +# here is example how to check extra labels required to be added to PodMonitor +# 1) kubectl get prometheus -o jsonpath='{.items[].spec.podMonitorSelector.matchLabels}' # e.g. release: prometheus +# 2) helm install pcm . --set podMonitor=true --set podMonitorLabels.release=prometheus +podMonitorLabels: {} +# Default interval for Prometheus scrapping configuration +podMonitorInterval: 30s + + +### -------------- NRI balloons policy plugin ------------- +# PCM deployment to be intergrated with NRI balloons resource policy intergration +# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. +nriBalloonsPolicyIntegration: false + +### ------------- node-feature-discovery ----------------- +# when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal) +nfd: false +# if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true) +nfdBaremetalAffinity: false +# if enabled, followin RDT labels will be required for scheduling (requires nfd=true) +# feature.node.kubernetes.io/cpu-rdt.RDTCMT=true +# feature.node.kubernetes.io/cpu-rdt.RDTL3CA=true +# feature.node.kubernetes.io/cpu-rdt.RDTMBA=true +# feature.node.kubernetes.io/cpu-rdt.RDTMBM=true +# feature.node.kubernetes.io/cpu-rdt.RDTMON=true +nfdRDTAffinity: false + + +### -------------- verticalPodAutoscaler ------------------ +# Enable vertical pod autoscaler support for pcm-sensor-server +verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + # updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + # updateMode: Auto diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index ef6bdc89..ec16202d 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -552,7 +552,7 @@ bool PCM::L3CacheOccupancyMetricAvailable() const bool PCM::CoreLocalMemoryBWMetricAvailable() const { - if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata + //if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; @@ -562,7 +562,7 @@ bool PCM::CoreLocalMemoryBWMetricAvailable() const bool PCM::CoreRemoteMemoryBWMetricAvailable() const { - if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata + //if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata PCM_CPUID_INFO cpuinfo; if (!(QOSMetricAvailable() && L3QOSMetricAvailable())) return false; diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp index 72a89ec5..70e05dc6 100644 --- a/src/pcm-sensor-server.cpp +++ b/src/pcm-sensor-server.cpp @@ -427,6 +427,10 @@ class JSONPrinter : Visitor PCM* pcm = PCM::getInstance(); printCounter( "DRAM Writes", getBytesWrittenToMC ( before, after ) ); printCounter( "DRAM Reads", getBytesReadFromMC ( before, after ) ); + + if (pcm->localMemoryRequestRatioMetricAvailable()) + printCounter( "DRAM Local Percentage", getLocalMemoryRequestRatio( before, after ) ); + if(pcm->nearMemoryMetricsAvailable()){ printCounter( "NM HitRate", getNMHitRate ( before, after ) ); printCounter( "NM Hits", getNMHits ( before, after ) ); @@ -715,6 +719,10 @@ class PrometheusPrinter : Visitor addToHierarchy( "source=\"uncore\"" ); printCounter( "DRAM Writes", getBytesWrittenToMC ( before, after ) ); printCounter( "DRAM Reads", getBytesReadFromMC ( before, after ) ); + + if (pcm->localMemoryRequestRatioMetricAvailable()) + printCounter( "DRAM Local Percentage", getLocalMemoryRequestRatio( before, after ) ); + if(pcm->nearMemoryMetricsAvailable()){ printCounter( "NM Hits", getNMHits ( before, after ) ); printCounter( "NM Misses", getNMMisses ( before, after ) ); From d75b013f45d47366608a938f4550f051dabb7a64 Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Thu, 6 Jun 2024 16:34:43 +0200 Subject: [PATCH 2/8] First version of linter + tests --- deployment/pcm/Chart.yaml | 2 +- deployment/pcm/Makefile | 2 ++ deployment/pcm/values-direct-privileged.yaml | 1 - deployment/pcm/values.yaml | 26 ++++++++++---------- 4 files changed, 16 insertions(+), 15 deletions(-) create mode 100644 deployment/pcm/Makefile diff --git a/deployment/pcm/Chart.yaml b/deployment/pcm/Chart.yaml index f200feaa..685e8b4d 100644 --- a/deployment/pcm/Chart.yaml +++ b/deployment/pcm/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: pcm version: 0.1.0 -appVersion: "202403" +appVersion: "202404" description: A PCM Helm chart for Kubernetes home: https://github.com/intel/pcm maintainers: diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile new file mode 100644 index 00000000..6ae38b12 --- /dev/null +++ b/deployment/pcm/Makefile @@ -0,0 +1,2 @@ +chart-lint-report.txt: values.yaml templates + docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt diff --git a/deployment/pcm/values-direct-privileged.yaml b/deployment/pcm/values-direct-privileged.yaml index c307438d..531224c6 100644 --- a/deployment/pcm/values-direct-privileged.yaml +++ b/deployment/pcm/values-direct-privileged.yaml @@ -13,4 +13,3 @@ resctrlMount: false # with MSR resctrl mount is not needed resctrlInsideMount: false sysMount: false pciMount: false -mcfgMount: false diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml index b8eda605..18643f0c 100644 --- a/deployment/pcm/values.yaml +++ b/deployment/pcm/values.yaml @@ -1,9 +1,9 @@ ### -------------- Naming ------------------- -# used in +# Used in: # - common label: app.kubernetes.io/name otherwise "Chart name" # - also in selectorLabels together with release.name # defaults to "Chart.name" -nameOverride: "" +nameOverride: "" # Used as daemonset name (usually based on truncated "name + release name") fullnameOverride: "" @@ -19,7 +19,7 @@ imagePullSecrets: {} privileged: false # Use new kernel 5.8+ PERFMON (least privileged) instead of generic SYS_ADMIN capability -# !Warning requires kernel 5.8+ +# !Warning requires kernel 5.8+ # more info here: https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html#perf-events-access-control cap_perfmon: true @@ -40,13 +40,13 @@ probes: false ### -------------- Metrics: Uncore ------------ # Mounts section # NOTE: only required for direct mode -# required for uncore metrics discovery and working only in baremetal, not available for VM +# required for uncore metrics discovery and working only in baremetal, not available for VM sysMount: false # mounts host /sys into container /pcm/sys/ pciMount: false # mounts host /proc/bus/pci into container /pcm/proc/bus/pci/ # NOTE this is only required for direct unprivileged mode ?!?!?! -# TODO: to be removed!!!?!?!!?!? (already coverred sysMounts !!!!) -#mcfgMount: false # mounts hosts: /sys/firmware/acpi/tables/MCFG -> /pcm/sys/firmware/acpi/tables/MCFG +# TODO: to be removed!!!?!?!!?!? (already coverred sysMounts !!!!) yes or not +mcfgMount: false # mounts hosts: /sys/firmware/acpi/tables/MCFG -> /pcm/sys/firmware/acpi/tables/MCFG ### linux Perf (indirect) vs msr(direct) # Lets try "indirect" as default @@ -61,13 +61,13 @@ PCM_USE_RESCTRL: 1 # use Linux Perf instead of MSR access (more reli # required for indirect RDT access, not available for VM only in baremetal # do not mount by default RDT can be also accessed through direct MSR programming resctrlMount: true # mount from external host -resctrlInsideMount: false # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require +resctrlInsideMount: false # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require ### -------------- Other (NMI handling and/or on VM/AWS) PCM_IGNORE_ARCH_PERFMON: 0 # After VM is detected through CPUID (hypervisor flag) - check arch_perfmon flag to be also enabled - fail if not avaiable (0 - do check, 1 - disable check) # 0: Disabling NMI watchdog since it consumes one hw-PMU counter, requires nmiWatchdogMount to be true # 1: don't disable NMI watchdog (reducing the core metrics set) - prefferd for production usage! -# but even with 0 automatic AWS workround applies! +# but even with 0 automatic AWS workround applies! PCM_KEEP_NMI_WATCHDOG: 0 # workaround: after VM is detected: "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n";) # 1: disables workaround and tries to use four programable counters (without workaround on VM will pcm-sensor-server will hang) @@ -79,7 +79,7 @@ nmiWatchdogMount: true ### -------------- Other (Debugging options for uncore pmu discovery) PCM_NO_UNCORE_PMU_DISCOVERY: 0 # skip 1: this is not required for direct privileged access and with 0 ends with WARNING enumaration failed -PCM_PRINT_UNCORE_PMU_DISCOVERY: 1 # show: discovered pmu +PCM_PRINT_UNCORE_PMU_DISCOVERY: 1 # show: discovered pmu PCM_PRINT_TOPOLOGY: 0 # show individual CPU topology for each core (plenty of lines) PCM_NO_MAIN_EXCEPTION_HANDLER: 0 # show full call stack of error @@ -95,7 +95,7 @@ cpuRequest: 100m memoryLimit: 512Mi memoryRequest: 256Mi # requests, limits level need to be specified here -extraResources: {} +extraResources: {} ### =============================== Integrations with other projects ==================================== # @@ -116,12 +116,12 @@ podMonitorInterval: 30s ### -------------- NRI balloons policy plugin ------------- # PCM deployment to be intergrated with NRI balloons resource policy intergration -# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. +# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. nriBalloonsPolicyIntegration: false ### ------------- node-feature-discovery ----------------- -# when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal) -nfd: false +# when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal) +nfd: false # if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true) nfdBaremetalAffinity: false # if enabled, followin RDT labels will be required for scheduling (requires nfd=true) From 92fbe0c0fb8b928993f4d9c815006092b04cb935 Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Fri, 7 Jun 2024 12:57:15 +0200 Subject: [PATCH 3/8] README update + better Dockerfile.debug --- Dockerfile.debug | 3 ++- deployment/pcm/README.md | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Dockerfile.debug b/Dockerfile.debug index da966e97..8b3e04ed 100644 --- a/Dockerfile.debug +++ b/Dockerfile.debug @@ -2,5 +2,6 @@ FROM fedora:40@sha256:4e007f288dce23966216be81ef62ba05d139b9338f327c1d1c73b7167d RUN dnf -y install gcc-c++ git findutils make cmake strace gdb util-linux COPY . /tmp/pcm -RUN --mount=type=cache,target=/tmp/pcm/build cd /tmp/pcm/build && cmake -D CMAKE_BUILD_TYPE=Debug .. && cmake --build . -t pcm pcm-sensor-server pcm-tpmi -j && cp -v /tmp/pcm/build/bin/pcm* /bin/ +RUN --mount=type=cache,target=/tmp/pcm/build cd /tmp/pcm/build && cmake -D CMAKE_BUILD_TYPE=Debug .. && cmake --build . -t pcm pcm-sensor-server pcm-tpmi -j && mkdir -p /usr/local/bin && cp -v /tmp/pcm/build/bin/pcm* /usr/local/bin/ #ENV PCM_NO_PERF=1 +ENTRYPOINT [ "/usr/local/bin/pcm-sensor-server", "-p", "9738", "-r" ] diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md index 338be95c..9605e897 100644 --- a/deployment/pcm/README.md +++ b/deployment/pcm/README.md @@ -95,6 +95,7 @@ For validation to verify that all metrics are available without msr, unload "msr ``` rmmod msr echo 2 > /proc/sys/kernel/perf_event_paranoid +cat /proc/sys/kernel/perf_event_paranoid # expected value 2 ``` #### 2) Create kind based Kubernetes cluster @@ -292,6 +293,7 @@ helm install pcm . -f values-vm.yaml #### Heterogeneous (mixed VM/metal instances) cluster +values-metal.yaml requires node-feature-discovery to be preinstallaed ``` helm install pcm-vm . -f values-vm.yaml helm install pcm-metal . -f values-metal.yaml @@ -316,20 +318,22 @@ wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh bash kind-with-registry.sh ``` -2) Build docker image and upload to local registry (from project root directory) -``` -docker build . -t localhost:5001/pcm-local -docker push localhost:5001/pcm-local +2) Build docker image and upload to local registry +``` # optionally create buildx based builder mkdir ~/.docker/cli-plugins curl -sL https://github.com/docker/buildx/releases/download/v0.14.0/buildx-v0.14.0.linux-amd64 -o ~/.docker/cli-plugins/docker-buildx chmod +x ~/.docker/cli-plugins/docker-buildx docker buildx create --driver docker-container --name mydocker --use --bootstrap -# or with single line (from deployment/pcm/ directory) -# Build local image for tests/development -# Following Dockerfile contains source code of pcm and some debugging utils (like gdb,strace for further analysis) +# Build production image from **project root directory**: +docker build . -t localhost:5001/pcm-local +docker push localhost:5001/pcm-local + +# Build/push **debug** image with single line +# Debug Dockerfile contains source code of pcm and some debugging utils (like gdb,strace for further analysis) +# Run from deployment/pcm/ directory: (cd ../.. ; docker build . -f Dockerfile.debug -t localhost:5001/pcm-local && docker push localhost:5001/pcm-local) ``` From 513b7c93d9d761e2f2fe14a0400d8038332658c8 Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Tue, 11 Jun 2024 12:21:42 -0100 Subject: [PATCH 4/8] Chart testing using helm test --- .gitignore | 1 + deployment/pcm/README.md | 15 ++++++++++++++ .../pcm/templates/_tests/test-connection.yaml | 20 +++++++++++++++++++ .../pcm/templates/_tests/test-service.yaml | 19 ++++++++++++++++++ src/CMakeLists.txt | 2 +- 5 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 deployment/pcm/templates/_tests/test-connection.yaml create mode 100644 deployment/pcm/templates/_tests/test-service.yaml diff --git a/.gitignore b/.gitignore index 2bd0d9df..01d6f0a2 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,4 @@ src/simdjson /deployment/pcm/nri/ /deployment/pcm/kind-with-registry.sh /deployment/pcm/autoscaler +/deployment/pcm/pcm-dashboard.json diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md index 9605e897..6f577bea 100644 --- a/deployment/pcm/README.md +++ b/deployment/pcm/README.md @@ -358,6 +358,21 @@ kubectl exec -ti ds/pcm -- bash kubectl logs ds/pcm ``` +6) Helm testing + +``` +helm test pcm + +# in case of failing, see the logs of test connection pod +# NOTE: filter is used to ignore service (helm limitation, which tries to download logs from service), so it assumes service exists, because previous run failed +helm test pcm --logs --filter name=pcm-test-connection + +# or run test-connection-pod manually +kubectl run -ti --rm --image busybox pcm-test-connection-manual -- sh +kubectl run -ti --rm --image busybox pcm-test-connection-manual -- ping pcm-test-connection -t 1 -W 1 -w 1 -c 1 +kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 pcm-test-connection:9739/metrics +``` + ### Metric collection methods (capabilities vs requirements) diff --git a/deployment/pcm/templates/_tests/test-connection.yaml b/deployment/pcm/templates/_tests/test-connection.yaml new file mode 100644 index 00000000..69f074b5 --- /dev/null +++ b/deployment/pcm/templates/_tests/test-connection.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "pcm.fullname" . }}-test-connection" + #name: pcm + # labels: + #{{/* {{- include "pcm.labels" . | nindent 4 }} */}} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "2" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + containers: + - name: wget + image: busybox + command: + - 'sh' + - '-c' + - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection:9739/metrics -O - | grep Measurement_Interval_in_us' + restartPolicy: Never diff --git a/deployment/pcm/templates/_tests/test-service.yaml b/deployment/pcm/templates/_tests/test-service.yaml new file mode 100644 index 00000000..8416c3f5 --- /dev/null +++ b/deployment/pcm/templates/_tests/test-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ include "pcm.fullname" . }}-test-connection" + labels: + {{- include "pcm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "1" + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + type: NodePort + ports: + - port: 9739 + targetPort: pcm-metrics + protocol: TCP + name: pcm-metrics + selector: + {{- include "pcm.selectorLabels" . | nindent 4 }} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0af9ad1c..a211d7cd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,7 +3,7 @@ # All pcm-* executables -set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) +set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel dashboardtest) file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp tpmi.cpp pmt.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) From 6f3d9eba5bf3c050f014d3a1a8e4c75568033455 Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Tue, 11 Jun 2024 16:03:49 -0100 Subject: [PATCH 5/8] improve helm test - fix proper namespace --- deployment/pcm/README.md | 4 ++-- deployment/pcm/templates/_tests/test-connection.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md index 6f577bea..5518e74f 100644 --- a/deployment/pcm/README.md +++ b/deployment/pcm/README.md @@ -369,8 +369,8 @@ helm test pcm --logs --filter name=pcm-test-connection # or run test-connection-pod manually kubectl run -ti --rm --image busybox pcm-test-connection-manual -- sh -kubectl run -ti --rm --image busybox pcm-test-connection-manual -- ping pcm-test-connection -t 1 -W 1 -w 1 -c 1 -kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 pcm-test-connection:9739/metrics +kubectl run -ti --rm --image busybox pcm-test-connection-manual -- ping pcm-test-connection.default.svc.cluster.local -t 1 -W 1 -w 1 -c 1 +kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 pcm-test-connection.default.svc.cluster.local:9739/metrics ``` ### Metric collection methods (capabilities vs requirements) diff --git a/deployment/pcm/templates/_tests/test-connection.yaml b/deployment/pcm/templates/_tests/test-connection.yaml index 69f074b5..517a7b7b 100644 --- a/deployment/pcm/templates/_tests/test-connection.yaml +++ b/deployment/pcm/templates/_tests/test-connection.yaml @@ -16,5 +16,5 @@ spec: command: - 'sh' - '-c' - - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection:9739/metrics -O - | grep Measurement_Interval_in_us' + - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.localg:9739/metrics -O - | grep Measurement_Interval_in_us' restartPolicy: Never From a088ab0ced49740367bec0aea4f5e4f04db7e489 Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Wed, 12 Jun 2024 13:15:06 -0100 Subject: [PATCH 6/8] Initial version of e2e for pcm/prometheus and VPA --- .gitignore | 1 + deployment/pcm/Makefile | 81 +++++++++++++++++++ .../_tests/test-connection.yaml | 15 ---- .../pcm/templates/_tests/test-connection.yaml | 2 +- 4 files changed, 83 insertions(+), 16 deletions(-) delete mode 100644 deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml diff --git a/.gitignore b/.gitignore index 01d6f0a2..e1845eb8 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ src/simdjson /deployment/pcm/kind-with-registry.sh /deployment/pcm/autoscaler /deployment/pcm/pcm-dashboard.json +/deployment/pcm/kind* diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile index 6ae38b12..c1c56816 100644 --- a/deployment/pcm/Makefile +++ b/deployment/pcm/Makefile @@ -1,2 +1,83 @@ +.PHONY=kind-cluster-clean + +# https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile +define KIND_EXTRA_MOUNTS +nodes: +- role: control-plane + extraMounts: + - hostPath: /sys/fs/resctrl + containerPath: /sys/fs/resctrl +endef + chart-lint-report.txt: values.yaml templates docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt + +export KIND_EXTRA_MOUNTS +kind-with-registry.sh: + curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o kind-with-registry.sh.tmp + echo "$$KIND_EXTRA_MOUNTS" >kind_extra_mounts.txt + sed '/apiVersion: kind.x-k8s.io\/v1alpha4/r kind_extra_mounts.txt' kind-with-registry.sh.tmp >kind-with-registry.sh + chmod +x kind-with-registry.sh + +kind-cluster: kind-with-registry.sh + ./kind-with-registry.sh + kind export kubeconfig + touch kind-cluster + + +clean: + kind delete cluster + docker rm -f kind-registry + rm -fv kind_extra_mounts.txt + rm -fv kind-with-registry.sh + rm -fv kind-with-registry.sh.tmp + rm -fv kind-deploy-prometheus + + +# +# minimal +# +kind-deploy-pcm: + helm install pcm . + +kind-pcm-test: + helm test pcm + +e2e-small: kind-cluster kind-deploy-pcm kind-pcm-test + + +# +# prometheus (pod monitor test) +# +kind-deploy-prometheus: + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false + kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1 + touch kind-deploy-prometheus + +kind-pcm-upgrade-prometheus: + helm upgrade --install pcm . --set podMonitor=true + +kind-pcm-prometheus-test: + kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1 + +e2e-prometheus: kind-cluster kind-deploy-prometheus kind-pcm-upgrade-prometheus kind-pcm-test kind-pcm-prometheus-test + +# +# VPA +# +autoscaler: + git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler + +kind-deploy-metrics-server: + helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ + helm repo update + helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system + +kind-deploy-vpa: autoscaler + ./autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh + +kind-pcm-vpa: + helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true + +e2e-vpa: kind-cluster kind-deploy-prometheus kind-deploy-vpa kind-pcm-test diff --git a/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml b/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml deleted file mode 100644 index d26943be..00000000 --- a/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: "{{ include "pcm.fullname" . }}-test-connection" - labels: - {{- include "pcm.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": test -spec: - containers: - - name: wget - image: busybox - command: ['wget'] - args: ['{{ include "pcm.fullname" . }}:9738'] - restartPolicy: Never diff --git a/deployment/pcm/templates/_tests/test-connection.yaml b/deployment/pcm/templates/_tests/test-connection.yaml index 517a7b7b..3626676c 100644 --- a/deployment/pcm/templates/_tests/test-connection.yaml +++ b/deployment/pcm/templates/_tests/test-connection.yaml @@ -16,5 +16,5 @@ spec: command: - 'sh' - '-c' - - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.localg:9739/metrics -O - | grep Measurement_Interval_in_us' + - 'sleep 15; ping {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local:9739/metrics -O - | grep Measurement_Interval_in_us' restartPolicy: Never From cb36269e8cb15865a4395e6471b9ff763376b24c Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Thu, 13 Jun 2024 10:54:10 -0100 Subject: [PATCH 7/8] fix with proper names and add NFD/metal case --- .gitignore | 4 +- deployment/pcm/Makefile | 84 +++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index e1845eb8..beb7c8ba 100644 --- a/.gitignore +++ b/.gitignore @@ -35,7 +35,5 @@ build src/simdjson /deployment/pcm/smarter-device-manager/ /deployment/pcm/nri/ -/deployment/pcm/kind-with-registry.sh -/deployment/pcm/autoscaler /deployment/pcm/pcm-dashboard.json -/deployment/pcm/kind* +/deployment/pcm/_kind* diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile index c1c56816..3f2f163b 100644 --- a/deployment/pcm/Makefile +++ b/deployment/pcm/Makefile @@ -1,5 +1,3 @@ -.PHONY=kind-cluster-clean - # https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile define KIND_EXTRA_MOUNTS nodes: @@ -13,71 +11,91 @@ chart-lint-report.txt: values.yaml templates docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt export KIND_EXTRA_MOUNTS -kind-with-registry.sh: - curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o kind-with-registry.sh.tmp - echo "$$KIND_EXTRA_MOUNTS" >kind_extra_mounts.txt - sed '/apiVersion: kind.x-k8s.io\/v1alpha4/r kind_extra_mounts.txt' kind-with-registry.sh.tmp >kind-with-registry.sh - chmod +x kind-with-registry.sh - -kind-cluster: kind-with-registry.sh - ./kind-with-registry.sh +_kind_with_registry.sh: + curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o _kind_with_registry.sh.tmp + echo "$$KIND_EXTRA_MOUNTS" >_kind_extra_mounts.txt + sed '/apiVersion: kind.x-k8s.io\/v1alpha4/r _kind_extra_mounts.txt' _kind_with_registry.sh.tmp >_kind_with_registry.sh + chmod +x _kind_with_registry.sh + +_kind_deploy_cluster: _kind_with_registry.sh + ./_kind_with_registry.sh kind export kubeconfig - touch kind-cluster + touch _kind_deploy_cluster clean: kind delete cluster docker rm -f kind-registry - rm -fv kind_extra_mounts.txt - rm -fv kind-with-registry.sh - rm -fv kind-with-registry.sh.tmp - rm -fv kind-deploy-prometheus + rm -fv _kind_with_registry.sh + rm -fv _kind_extra_mounts.txt + rm -fv _kind_with_registry.sh.tmp + rm -fv _kind_deploy_cluster + rm -fv _kind_deploy_prometheus + rm -fv _kind_deploy_pcm # -# minimal +# e2e-small: minimal E2e pcm pod only test # -kind-deploy-pcm: +_kind_deploy_pcm: helm install pcm . + touch _kind_deploy_pcm -kind-pcm-test: +kind_pcm_test: helm test pcm -e2e-small: kind-cluster kind-deploy-pcm kind-pcm-test +e2e-small: _kind_deploy_cluster _kind_deploy_pcm kind_pcm_test + +# +# e2e-small-metal-nfd: minimal E2e pcm pod only test but with direct approach +# +_kind_deploy_pcm_metal: + helm upgrade --install pcm . -f values-metal.yaml + touch _kind_deploy_pcm_metal + +_kind_deploy_nfd: + helm upgrade --install pcm . -f values-metal.yaml + touch _kind_deploy_pcm_metal + +kind_pcm_test: + helm test pcm +e2e-small-metal: _kind_deploy_cluster _kind_deploy_pcm_metal kind_pcm_test # -# prometheus (pod monitor test) +# e2e-prometheus: E2E test for podMonitor (pod monitor test) # -kind-deploy-prometheus: +_kind_deploy_prometheus: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1 - touch kind-deploy-prometheus + touch _kind_deploy_prometheus -kind-pcm-upgrade-prometheus: +_kind_deploy_pcm-with-prometheus: helm upgrade --install pcm . --set podMonitor=true -kind-pcm-prometheus-test: +kind_pcm_prometheus_test: kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1 -e2e-prometheus: kind-cluster kind-deploy-prometheus kind-pcm-upgrade-prometheus kind-pcm-test kind-pcm-prometheus-test +e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus _kind_deploy_pcm-with-prometheus kind_pcm_test kind_pcm_prometheus_test # -# VPA +# e2e-vpa: VPA E2E tests # -autoscaler: - git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler +_kind_autoscaler: + git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler _kind_autoscaler -kind-deploy-metrics-server: +_kind_deploy_metrics_server: helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ helm repo update helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system + touch _kind_deploy_metrics_server -kind-deploy-vpa: autoscaler - ./autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh +_kind_deploy_vpa: autoscaler + ./_kind_autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh + touch _kind_deploy_vpa -kind-pcm-vpa: +_kind_deploy_pcm_with_vpa: helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true -e2e-vpa: kind-cluster kind-deploy-prometheus kind-deploy-vpa kind-pcm-test +e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa _kind_deploy_pcm_with_vpa kind_pcm_test From 91b445ecc8c4540a848b59aba4d0bed1e824c1b5 Mon Sep 17 00:00:00 2001 From: Pawel Palucki Date: Tue, 18 Jun 2024 12:17:42 -0100 Subject: [PATCH 8/8] e2e tests: cont --- deployment/pcm/Makefile | 112 +++++++++++------- deployment/pcm/README.md | 33 +++++- ...alues-metal.yaml => values-metal-nfd.yaml} | 1 - deployment/pcm/values.yaml | 3 +- 4 files changed, 101 insertions(+), 48 deletions(-) rename deployment/pcm/{values-metal.yaml => values-metal-nfd.yaml} (92%) diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile index 3f2f163b..3ddcd322 100644 --- a/deployment/pcm/Makefile +++ b/deployment/pcm/Makefile @@ -1,3 +1,10 @@ + +chart-lint-report.txt: values.yaml templates + docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt + +# +# kind cluster targets +# # https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile define KIND_EXTRA_MOUNTS nodes: @@ -7,9 +14,6 @@ nodes: containerPath: /sys/fs/resctrl endef -chart-lint-report.txt: values.yaml templates - docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt - export KIND_EXTRA_MOUNTS _kind_with_registry.sh: curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o _kind_with_registry.sh.tmp @@ -23,64 +27,73 @@ _kind_deploy_cluster: _kind_with_registry.sh touch _kind_deploy_cluster -clean: - kind delete cluster - docker rm -f kind-registry - rm -fv _kind_with_registry.sh - rm -fv _kind_extra_mounts.txt - rm -fv _kind_with_registry.sh.tmp - rm -fv _kind_deploy_cluster - rm -fv _kind_deploy_prometheus - rm -fv _kind_deploy_pcm - - # -# e2e-small: minimal E2e pcm pod only test +# 1) e2e-default: minimal E2e pcm pod only test # -_kind_deploy_pcm: +kind_deploy_pcm: helm install pcm . - touch _kind_deploy_pcm + kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1 kind_pcm_test: helm test pcm -e2e-small: _kind_deploy_cluster _kind_deploy_pcm kind_pcm_test +e2e-default: _kind_deploy_cluster kind_deploy_pcm kind_pcm_test # -# e2e-small-metal-nfd: minimal E2e pcm pod only test but with direct approach +# 2) e2e-default-local-image: minimal E2e pcm with local image build # -_kind_deploy_pcm_metal: - helm upgrade --install pcm . -f values-metal.yaml - touch _kind_deploy_pcm_metal +build_local_image: + (cd ../.. ; docker build . -t localhost:5001/pcm-local) + docker push localhost:5001/pcm-local -_kind_deploy_nfd: - helm upgrade --install pcm . -f values-metal.yaml - touch _kind_deploy_pcm_metal +kind_deploy_pcm_local_image: + helm upgrade --install --reset-values --wait pcm . -f values-local-image.yaml + kubectl wait daemonset pcm --for=jsonpath='{.spec.template.spec.containers[0].image'}=localhost:5001/pcm-local:latest + kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1 -kind_pcm_test: - helm test pcm - -e2e-small-metal: _kind_deploy_cluster _kind_deploy_pcm_metal kind_pcm_test +e2e-default-local-image: _kind_deploy_cluster build_local_image kind_deploy_pcm_local_image kind_pcm_test # -# e2e-prometheus: E2E test for podMonitor (pod monitor test) +# 3) e2e-prometheus: E2E test for podMonitor (pod monitor test) # _kind_deploy_prometheus: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts - helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false - kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1 + helm upgrade --install --reset-values prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false --wait + kubectl wait Prometheus prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.availableReplicas}'=1 + #kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1 touch _kind_deploy_prometheus -_kind_deploy_pcm-with-prometheus: - helm upgrade --install pcm . --set podMonitor=true +kind_deploy_pcm_with_prometheus: + helm upgrade --install --reset-values pcm . --set podMonitor=true + kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1 + +kind_pcm_test_prometheus: + kubectl proxy & sleep 10 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1 -kind_pcm_prometheus_test: - kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1 +e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus kind_deploy_pcm_with_prometheus kind_pcm_test kind_pcm_test_prometheus + +# +# 4) e2e-metal-nfd: e2e thats tests that with node-feature-discovery installed and nfd values are changed, the PCM will be only installed on non hyperviserd system with Intel vendor and RDT available +# +_kind_deploy_nfd: + #kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.16.0-devel + helm repo add nfd https://kubernetes-sigs.github.io/node-feature-discovery/charts + helm upgrade --install --wait nfd nfd/node-feature-discovery --namespace node-feature-discovery --create-namespace + # please be patient NFD requires around 2 minutes to annotate the node ... + kubectl wait node --timeout=2m kind-control-plane --for=jsonpath='{.metadata.labels.feature\.node\.kubernetes\.io\/cpu-model\.vendor_id}'=Intel + +kind_deploy_pcm_with_metal_nfd: + helm upgrade --install --reset-values pcm . -f values-metal-nfd.yaml + kubectl wait daemonset --timeout=2m pcm --for=jsonpath='{.status.numberReady}'=1 + +kind_pcm_test_nfd: + kubectl wait daemonset pcm --timeout=2m --for=jsonpath='{.spec.template.spec.nodeSelector.feature\.node\.kubernetes\.io\/cpu-model\.vendor_id}'=Intel + helm test pcm -e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus _kind_deploy_pcm-with-prometheus kind_pcm_test kind_pcm_prometheus_test +e2e-metal-nfd: _kind_deploy_cluster _kind_deploy_nfd kind_deploy_pcm_with_metal_nfd kind_pcm_test # -# e2e-vpa: VPA E2E tests +# 5) e2e-vpa: VPA E2E tests # _kind_autoscaler: git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler _kind_autoscaler @@ -88,14 +101,29 @@ _kind_autoscaler: _kind_deploy_metrics_server: helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ helm repo update - helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system + helm upgrade --install --reset-values --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system + kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1 touch _kind_deploy_metrics_server _kind_deploy_vpa: autoscaler ./_kind_autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh touch _kind_deploy_vpa -_kind_deploy_pcm_with_vpa: - helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true +kind_deploy_pcm_with_vpa: + helm upgrade --install --reset-values pcm . --set verticalPodAutoscaler.enabled=true + kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1 + +e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa kind_deploy_pcm_with_vpa kind_pcm_test + +# +# Cleanup +# -e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa _kind_deploy_pcm_with_vpa kind_pcm_test +clean: + kind delete cluster + docker rm -f kind-registry + rm -fv _kind_with_registry.sh + rm -fv _kind_extra_mounts.txt + rm -fv _kind_with_registry.sh.tmp + rm -fv _kind_deploy_cluster + rm -fv _kind_deploy_prometheus diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md index 5518e74f..510ad941 100644 --- a/deployment/pcm/README.md +++ b/deployment/pcm/README.md @@ -5,7 +5,7 @@ Helm chart instructions ### Features: - Configurable as non-privileged container (value: `privileged`, default: false) and privileged container, -- Support for bare-metal and VM host configurations (files: [values-metal.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)), +- Support for bare-metal and VM host configurations (files: [values-metal-nfd.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)), - Ability to deploy multiple releases alongside configured differently to handle different kinds of machines (bare-metal, VM) at the [same time](#heterogeneous-mixed-vmmetal-instances-cluster), - Linux Watchdog handling (controlled with `PCM_KEEP_NMI_WATCHDOG`, `PCM_NO_AWS_WORKAROUND`, `nmiWatchdogMount` values). - Deploy to own namespace with "helm install ... **-n pcm --create-namespace**". @@ -77,6 +77,22 @@ More information here: https://kubernetes.io/docs/tutorials/security/ns-level-ps - hostPort 9738 is exposed on host. (TODO: security review, consider TLS, together with Prometheus scrapping !!). - Prometheus podMonitor is disabled (enabled it with --set podMonitor=true). +### TLS + +TODO: +- requires pcm-sensor-server to be build with SSL support +- ERRROR !!!! + +``` +mkdir build +cd build +cmake .. -DCMAKE_CXX_FLAGS='-DUSE_SSL -lssl' +zypper install openssl-devel +make pcm-sensor-server -j +openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=StateName/L=CityName/O=CompanyName/OU=CompanySectionName/CN=CommonNameOrHostname" +./bin/pcm-sensor-server -s -p 8443 --certificateFile cert.pem --privateKeyFile key.pem +``` + ### Validation on local kind cluster #### Requirements @@ -282,7 +298,7 @@ helm install pcm . -f values-direct-privileged.yaml #### Homogeneous bare metal instances cluster (full set of metrics) ``` -helm install pcm . -f values-metal.yaml +helm install pcm . -f values-metal-nfd.yaml ``` #### Homogenizer VM instances cluster (limited set of metrics core) @@ -293,10 +309,10 @@ helm install pcm . -f values-vm.yaml #### Heterogeneous (mixed VM/metal instances) cluster -values-metal.yaml requires node-feature-discovery to be preinstallaed +values-metal-nfd.yaml requires node-feature-discovery to be preinstallaed ``` helm install pcm-vm . -f values-vm.yaml -helm install pcm-metal . -f values-metal.yaml +helm install pcm-metal . -f values-metal-nfd.yaml ``` #### Direct method as non-privileged container (not recommended) @@ -402,3 +418,12 @@ kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 | | energy | | | cpucounters.cpp initEnergyMonitoring() | | +### E2E tests + +Following end to end tests based on kind enviornment are provided by make targets: + +- `e2e-default` - test PCM with default configuration (indirect) and checks connection by calling `helm test` +- `e2e-default-local-image` - same as above but build and deploys PCM with local image +- `e2e-prometheus` - test PCM chart with deployed PodMonitor with Prometheus stack and queries Prometheus for collected data, +- `e2e-vpa` - deploy PCM with VerticalPodAutoscaler (requires metrics-service to be deployed alongside) +- `e2e-metal-nfd` - test PCM chart on metal scheduled by features exposed by node-feature-discovery (uses: values-metal-nfd.yaml), diff --git a/deployment/pcm/values-metal.yaml b/deployment/pcm/values-metal-nfd.yaml similarity index 92% rename from deployment/pcm/values-metal.yaml rename to deployment/pcm/values-metal-nfd.yaml index 1ca73c1e..80a85bc5 100644 --- a/deployment/pcm/values-metal.yaml +++ b/deployment/pcm/values-metal-nfd.yaml @@ -5,4 +5,3 @@ PCM_NO_AWS_WORKAROUND: 1 PCM_KEEP_NMI_WATCHDOG: 0 nfd: true nfdBaremetalAffinity: true -nfdRDTAffinity: true diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml index 18643f0c..1bbb9607 100644 --- a/deployment/pcm/values.yaml +++ b/deployment/pcm/values.yaml @@ -124,7 +124,8 @@ nriBalloonsPolicyIntegration: false nfd: false # if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true) nfdBaremetalAffinity: false -# if enabled, followin RDT labels will be required for scheduling (requires nfd=true) +# if enabled, following RDT labels will be required for scheduling (requires nfd=true) +# TODO: those labels are no longer available with default node-feature-discovery deployment # feature.node.kubernetes.io/cpu-rdt.RDTCMT=true # feature.node.kubernetes.io/cpu-rdt.RDTL3CA=true # feature.node.kubernetes.io/cpu-rdt.RDTMBA=true