From cdb10cd6455712d45388027c31567572d122c74c Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Mon, 12 Feb 2024 17:57:06 +0100
Subject: [PATCH 1/8] Helm chart for pcm:

old comments:

sys/pci/mcfg mounts are unnessesary for indirect method
fix old wrong defaults in README
fix formatting
possible fix for issue with resctrl
remove hacks to handle /pcm/resctrl and unessesary out-of-date files
update License to use the same as pcm itself
update README, remove out-of-date info
links do values
formatting + links do values
update README an values comments
update README
address jcfunk comments: interval and extra labels for PodMonitor + refactor readme
fix typos
readme: reminder about removing msr kernel module
after rebasing: point to correct default pcm image from intel organization

Refactoring:

- explicit values file for privileged direct method,
- hide (into docs directory) "unprivileged" direct method (and fixes),
- remove unnessesary mounts (mcfg, /dev/cpu/dev/mem for privileged access),
- add instructions to collection methods,
- fixes (extra builder) for build local development image,
- silent mode
- move collection methods to the top

fix values files for direct privileged method

New: support for PERFMON capability, silent mode and some extra env
debug variables

VPA: v1 - first version of vertical pod autoscaler

Grafana dashboard: instructions

rename resctrlHostMount to resctrlMount

fix dashboard rate interval

pcm-sensor-server: add new metrics DRAM Local percantage

Fix dockerbuild by using separate Dockerfile + build in dockerignore

improve dockerfile.debug

extra env PCM_NO_MAIN_EXCEPTION_HANDLER
---
 .dockerignore                                 |   1 +
 .gitignore                                    |   6 +-
 Dockerfile.debug                              |   6 +
 deployment/pcm/.helmignore                    |  26 ++
 deployment/pcm/Chart.yaml                     |   9 +
 deployment/pcm/LICENSE                        |  30 ++
 deployment/pcm/README.md                      | 385 ++++++++++++++++++
 .../docs/direct-unprivileged-deployment.md    |  67 +++
 .../values-device-injector.yaml               |  16 +
 .../values-direct-unprivileged.yaml           |  19 +
 .../values-smarter-devices-cpu-mem.yaml       |   9 +
 .../helm_chart_test_and_notes_TODO/NOTES.txt  |   6 +
 .../_tests/test-connection.yaml               |  15 +
 deployment/pcm/k8s-test.sh                    |  77 ++++
 deployment/pcm/templates/_helpers.tpl         |  79 ++++
 deployment/pcm/templates/daemonset.yaml       | 201 +++++++++
 deployment/pcm/templates/podmonitor.yaml      |  41 ++
 .../pcm/templates/verticalpodautoscaler.yaml  |  40 ++
 deployment/pcm/values-direct-privileged.yaml  |  16 +
 deployment/pcm/values-local-image.yaml        |   4 +
 deployment/pcm/values-metal.yaml              |   8 +
 deployment/pcm/values-vm.yaml                 |   6 +
 deployment/pcm/values.yaml                    | 166 ++++++++
 src/cpucounters.cpp                           |   4 +-
 src/pcm-sensor-server.cpp                     |   8 +
 25 files changed, 1242 insertions(+), 3 deletions(-)
 create mode 100644 Dockerfile.debug
 create mode 100644 deployment/pcm/.helmignore
 create mode 100644 deployment/pcm/Chart.yaml
 create mode 100644 deployment/pcm/LICENSE
 create mode 100644 deployment/pcm/README.md
 create mode 100644 deployment/pcm/docs/direct-unprivileged-deployment.md
 create mode 100644 deployment/pcm/docs/direct-unprivileged-examples/values-device-injector.yaml
 create mode 100644 deployment/pcm/docs/direct-unprivileged-examples/values-direct-unprivileged.yaml
 create mode 100644 deployment/pcm/docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml
 create mode 100644 deployment/pcm/helm_chart_test_and_notes_TODO/NOTES.txt
 create mode 100644 deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml
 create mode 100644 deployment/pcm/k8s-test.sh
 create mode 100644 deployment/pcm/templates/_helpers.tpl
 create mode 100644 deployment/pcm/templates/daemonset.yaml
 create mode 100644 deployment/pcm/templates/podmonitor.yaml
 create mode 100644 deployment/pcm/templates/verticalpodautoscaler.yaml
 create mode 100644 deployment/pcm/values-direct-privileged.yaml
 create mode 100644 deployment/pcm/values-local-image.yaml
 create mode 100644 deployment/pcm/values-metal.yaml
 create mode 100644 deployment/pcm/values-vm.yaml
 create mode 100644 deployment/pcm/values.yaml

diff --git a/.dockerignore b/.dockerignore
index 796b96d1..b8cad4f8 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,2 @@
 /build
+/deployment
diff --git a/.gitignore b/.gitignore
index fdf68e8c..2bd0d9df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,4 +32,8 @@ latex/
 .vs/
 .idea/
 build
-src/simdjson
\ No newline at end of file
+src/simdjson
+/deployment/pcm/smarter-device-manager/
+/deployment/pcm/nri/
+/deployment/pcm/kind-with-registry.sh
+/deployment/pcm/autoscaler
diff --git a/Dockerfile.debug b/Dockerfile.debug
new file mode 100644
index 00000000..da966e97
--- /dev/null
+++ b/Dockerfile.debug
@@ -0,0 +1,6 @@
+FROM fedora:40@sha256:4e007f288dce23966216be81ef62ba05d139b9338f327c1d1c73b7167dd47312 as builder
+
+RUN dnf -y install gcc-c++ git findutils make cmake strace gdb util-linux
+COPY . /tmp/pcm
+RUN --mount=type=cache,target=/tmp/pcm/build cd /tmp/pcm/build && cmake -D CMAKE_BUILD_TYPE=Debug .. && cmake --build . -t pcm pcm-sensor-server pcm-tpmi -j && cp -v /tmp/pcm/build/bin/pcm* /bin/
+#ENV PCM_NO_PERF=1
diff --git a/deployment/pcm/.helmignore b/deployment/pcm/.helmignore
new file mode 100644
index 00000000..5dfaad8e
--- /dev/null
+++ b/deployment/pcm/.helmignore
@@ -0,0 +1,26 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+smarter-device-manager/
+nri/
+autoscaler/
diff --git a/deployment/pcm/Chart.yaml b/deployment/pcm/Chart.yaml
new file mode 100644
index 00000000..f200feaa
--- /dev/null
+++ b/deployment/pcm/Chart.yaml
@@ -0,0 +1,9 @@
+apiVersion: v2
+name: pcm
+version: 0.1.0
+appVersion: "202403"
+description: A PCM Helm chart for Kubernetes
+home: https://github.com/intel/pcm
+maintainers:
+  - name: Pawel Palucki
+    email: pawel.palucki@intel.com
diff --git a/deployment/pcm/LICENSE b/deployment/pcm/LICENSE
new file mode 100644
index 00000000..2d994393
--- /dev/null
+++ b/deployment/pcm/LICENSE
@@ -0,0 +1,30 @@
+BSD 3-Clause License
+
+Copyright (c) 2009-2024, Intel Corporation
+Copyright (c) 2016-2020, opcm
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md
new file mode 100644
index 00000000..338be95c
--- /dev/null
+++ b/deployment/pcm/README.md
@@ -0,0 +1,385 @@
+--------------------------------------------------------------------------------
+Helm chart instructions
+--------------------------------------------------------------------------------
+
+### Features:
+
+- Configurable as non-privileged container (value: `privileged`, default: false) and privileged container,
+- Support for bare-metal and VM host configurations (files: [values-metal.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)),
+- Ability to deploy multiple releases alongside configured differently to handle different kinds of machines (bare-metal, VM) at the [same time](#heterogeneous-mixed-vmmetal-instances-cluster),
+- Linux Watchdog handling (controlled with `PCM_KEEP_NMI_WATCHDOG`, `PCM_NO_AWS_WORKAROUND`, `nmiWatchdogMount` values).
+- Deploy to own namespace with "helm install ... **-n pcm --create-namespace**".
+- Silent mode (value: `silent`, default: false).
+- Backward compatible with older Linux kernels (<5.8) - (value: cap_perfmon, default: false).
+- VerticalPodAutoscaler (value: `verticalPodAutoscaler.enabled`, default: false)
+
+Here are available methods in this chart of metrics collection w.r.t interfaces and required access:
+
+| Method                  | Used interfaces      | default | Notes                                                                                                   | instructions                                                               |
+|-------------------------|----------------------| ------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- |
+| unprivileged "indirect" | perf, resctrl        |    v    | recommended, missing metrics: energy metrics  (TODO link to issues/PR or node_exporter/rapl_collector)  | `helm install . pcm`                                                       |
+| privileged "indirect"   | perf, resctrl        |         | not recommended, unsecure, no advantages over unprivileged), missing metrics: energy metrics            | `helm install . pcm --set privileged=true`                                 |
+| privileged "direct"     | msr                  |         | not recommended, unsecure and requires msr module pre loaded on host                                    | `helm install . pcm -f values-direct-privileged.yaml`                      |
+| unprivileged "direct"   | msr                  |         | not recommended, requires msr module and access to /dev/cpu  and /dev/mem (non trivial, like using 3rd plugins) | [link for detailed documentation](docs/direct-unprivileged-deployment.md)  |
+
+For more information about direct/indirect collection methods please see [here](#metric-collection-methods-capabilites-vs-requirements)
+
+#### Integration features:
+
+- node-feature-discovery based nodeSelector and nodeAffinity (values: `nfd`, `nfdBaremetalAffinity`, `nfdRDTAffinity`),
+- Examples for non-privileged mode using device plugin ("smarter-devices-manager") or using NRI device-injector plugin (TODO) (file: [values-smarter-devices-cpu-mem.yaml](values-smarter-devices-cpu-mem.yaml) ),
+- Integration with NRI balloons policy plugin (value: `nriBalloonsPolicyIntegration`),
+
+#### Debugging features:
+
+- Local image registry for development (file: [values-local-image.yaml](values-local-image.yaml) ),
+- Deploy Prometheus operator' PodMonitor (value: `podMonitor`)
+
+### Getting started
+
+#### Indirect non-privileged method using Linux abstractions (perf/resctrl) default.
+
+```sh
+helm install pcm . 
+```
+
+#### Direct privileged method
+```
+helm install pcm . -f values-direct-privileged.yaml
+```
+
+#### All opt-in features: Node-feature-discovery + Prometheus podMonitor + vertical
+
+```
+helm install ... --set nfd=true --set podMonitor=true --set verticalPodAutoscaler.enabled=true
+```
+
+### Requirements
+
+- Full set of metrics (uncore/UPI, RDT, energy) requires bare-metal or .metal cloud instance.
+- /sys/fs/resctrl has to be mounted on host OS (for default indirect deployment method)
+- pod is allowed to be run with privileged capabilities (SYS_ADMIN, SYS_RAWIO) on given namespace in other words: Pod Security Standards allow to run on privileged level,
+
+```
+    pod-security.kubernetes.io/enforce: privileged
+    pod-security.kubernetes.io/enforce-version: latest
+    pod-security.kubernetes.io/audit: privileged
+    pod-security.kubernetes.io/audit-version: latest
+    pod-security.kubernetes.io/warn: privileged
+    pod-security.kubernetes.io/warn-version: latest
+```
+
+More information here: https://kubernetes.io/docs/tutorials/security/ns-level-pss/ .
+
+### Defaults
+
+- Indirect method uses Linux abstraction to access event counters (Linux Perf, resctrl) and run container in non-privileged mode.
+- hostPort 9738 is exposed on host. (TODO: security review, consider TLS, together with Prometheus scrapping !!).
+- Prometheus podMonitor is disabled (enabled it with --set podMonitor=true).
+
+### Validation on local kind cluster
+
+#### Requirements
+
+- kubectl/kind/helm/jq binaries available in PATH,
+- docker service up and running.
+- full set of metrics available only bare-metal instance or Cloud .metal instance.
+
+#### 1) (Optionally) mount resctrl filesystem (for RDT metrics) to unload "msr" kernel module for validation
+
+```
+mount -t resctrl resctrl /sys/fs/resctrl
+```
+
+For validation to verify that all metrics are available without msr, unload "msr" module from kernel and perf_event_paranoid has default value
+```
+rmmod msr
+echo 2 > /proc/sys/kernel/perf_event_paranoid
+```
+
+#### 2) Create kind based Kubernetes cluster
+
+```
+kind create cluster
+```
+
+**Note** to be able to collect and test RDT metrics through resctrl filesystem, kind cluster have to be created with additional mounts:
+```
+nodes:
+- role: control-plane
+  extraMounts:
+  - hostPath: /sys/fs/resctrl
+    containerPath: /sys/fs/resctrl
+```
+e.g. create kind cluster with local registry with [this script](https://kind.sigs.k8s.io/docs/user/local-registry/)
+and apply the patch to enable resctrl win following way:
+
+```
+wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh
+
+sed -i '/apiVersion: kind.x-k8s.io\/v1alpha4/a \
+nodes:\
+- role: control-plane\
+  extraMounts:\
+  - hostPath: /sys/fs/resctrl\
+    containerPath: /sys/fs/resctrl\
+' kind-with-registry.sh
+```
+
+Then create cluster using above patched script:
+```
+bash kind-with-registry.sh
+```
+
+Check that resctrl is available inside kind node:
+```
+docker exec kind-control-plane ls /sys/fs/resctrl/info
+# expected output:
+# L3_MON
+# MB
+# ...
+```
+
+
+and optionally local registry is running (to be used with local pcm build images, more detail [below](development-with-local-images-and-testing))
+```
+docker ps | grep kind-registry
+# expected output:
+# e57529be23ea   registry:2             "/entrypoint.sh /etc…"   3 weeks ago          Up 3 weeks          127.0.0.1:5001->5000/tcp    kind-registry
+```
+
+Export kind kubeconfig as default for further kubectl commands:
+```
+kind export kubeconfig
+kubectl get pods -A
+```
+
+#### 3) (Optionally) Deploy Node Feature Discovery (nfd)
+
+```
+# I.a. Using Kustomize:
+kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.16.0-devel
+
+# I.b. or with Helm Chart:
+helm repo add nfd https://kubernetes-sigs.github.io/node-feature-discovery/charts
+helm repo update
+helm install nfd/node-feature-discovery --namespace node-feature-discovery --create-namespace --generate-name
+
+# II. Check node "labels" with CPU features are added
+kubectl get node kind-control-plane -o yaml | grep feature.node
+```
+
+#### 4) (Optionally) Deploy Prometheus operator
+
+```
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
+kubectl get sts prometheus-prometheus-kube-prometheus-prometheus
+```
+
+Note: `podMonitorSelectorNilUsesHelmValues` is disabled (set to false) so Prometheus operator will be able to handle PCM podMonitor deployed without extra `podMonitorLabels` or otherwise pcm need to be deployed like this:
+`helm install pcm . --set podMonitor=true --set podMonitorLabels.release=prometheus` (assuming Prometheus operator was deployed as "prometheus")
+
+
+#### 5) (Optionally) Deploy metric-server and vertical-pod-autoscaler
+
+Note this is irrelevant to pcm-sensor-server functionality, but useful to observer pcm pod CPU/memory usage:
+
+a) metric-server
+
+```
+helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
+helm repo update
+helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
+```
+
+b) vertical pod autoscaler
+
+```
+git clone https://github.com/kubernetes/autoscaler
+./autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh
+```
+
+#### 6) Deploy PCM helm chart
+
+```
+# a) Deploy to current namespace with defaults
+helm install pcm . 
+
+# b) Alternatively deploy with NFD and/or with Prometheus enabled
+helm install pcm . --set podMonitor=true
+helm install pcm . --set nfd=true
+
+# c) Alternatively deploy into own "pcm" namespace 
+helm install pcm . --namespace pcm 
+```
+
+#### 7) Check metrics are exported
+
+Run proxy in background:
+```
+kubectl proxy &
+```
+
+Access PCM metrics directly:
+
+```sh
+kubectl get daemonset pcm
+kubectl get pods 
+podname=`kubectl get pod -l app.kubernetes.io/component=pcm-sensor-server -ojsonpath='{.items[0].metadata.name}'`
+
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep L3_Cache_Misses                                                         # source: core
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep DRAM_Writes                                                             # source: uncore
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep Local_Memory_Bandwidth{socket="1",aggregate="socket",source="core"}     # source: RDT
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/metrics | grep DRAM_Joules_Consumed                                                    # source: energy
+```
+
+... or through Prometheus UI/prom tool (requires prometheus operator to be deployed and helm install with with `--set podMonitor=true`):
+```
+http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/graph
+promtool query range --step 1m http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy 'rate(DRAM_Writes{aggregate="system"}[5m])/1e9'
+promtool query instant http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy 'avg by(__name__) ({job="pcm"})'
+```
+
+... or through Grafana with generated dashboard:
+
+```
+
+
+# 1) Download dashboard
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/default/pods/$podname/proxy/dashboard/prometheus -o pcm-dashboard.json
+
+# change default (too small) interval (from 4s to 2m, following Prometheus best practicies of rate being four times larger than scrapping 30s)
+# References: 
+# https://grafana.com/blog/2020/09/28/new-in-grafana-7.2-__rate_interval-for-prometheus-rate-queries-that-just-work/
+# ($__rate_interval is 4 x scrape interval defined in datasource provisioned by prometheus operator, scrape internval is based on Prometheus object which defaults to 30s)
+# - https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml#L1069
+# - https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml#L3381
+sed -i 's/4s/$__rate_interval/g' pcm-dashboard.json
+
+# 2) port forward with kubectl (--address=0.0.0.0)
+kubectl port-forward -n default service/prometheus-grafana 8002:80 
+
+# 3) User: admin/prom-operator
+# or get password kubectl get secret --namespace default prometheus-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
+http://127.0.0.1:8002
+
+# 4) Go to Dashboards/New/Import  and upload:
+
+pcm-dashboard.json
+
+```
+
+### Deploy alternative options
+
+#### Direct (msr access) as privileged container 
+```
+helm install pcm . -f values-direct-privileged.yaml
+```
+
+#### Homogeneous bare metal instances cluster (full set of metrics)
+
+```
+helm install pcm . -f values-metal.yaml
+```
+
+#### Homogenizer VM instances cluster (limited set of metrics core)
+
+```
+helm install pcm . -f values-vm.yaml
+```
+
+#### Heterogeneous (mixed VM/metal instances) cluster 
+
+```
+helm install pcm-vm . -f values-vm.yaml
+helm install pcm-metal . -f values-metal.yaml
+```
+
+#### Direct method as non-privileged container (not recommended)
+
+**Note** PCM requires access to /dev/cpu device in read-write mode (MSR access) but it is no possible currently to mount devices in Kubernetes pods/containers in vanilla Kubernetes for unprivileged containers. Please find more about this limitation https://github.com/kubernetes/kubernetes/issues/5607.
+
+To expose necessary devices to pcm-sensor-server, one can use:
+
+a) Kubernetes device plugin (using Kubernetes [CDI](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/) interface),
+b) containerd plugin (using [NRI](https://github.com/containerd/nri/) interface),
+
+Examples can be find [here](docs/direct-unprivileged-deployment.md).
+
+#### Development (with local images) and testing
+
+1) Setup kind with registry following this instruction: https://kind.sigs.k8s.io/docs/user/local-registry/
+```
+wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh
+bash kind-with-registry.sh
+```
+
+2) Build docker image and upload to local registry (from project root directory)
+```
+docker build . -t localhost:5001/pcm-local 
+docker push localhost:5001/pcm-local
+
+# optionally create buildx based builder
+mkdir ~/.docker/cli-plugins
+curl -sL https://github.com/docker/buildx/releases/download/v0.14.0/buildx-v0.14.0.linux-amd64 -o ~/.docker/cli-plugins/docker-buildx
+chmod +x ~/.docker/cli-plugins/docker-buildx
+docker buildx create --driver docker-container --name mydocker --use --bootstrap
+
+# or with single line (from deployment/pcm/ directory)
+# Build local image for tests/development
+# Following Dockerfile contains source code of pcm and some debugging utils (like gdb,strace for further analysis)
+(cd ../.. ;  docker build . -f Dockerfile.debug -t localhost:5001/pcm-local && docker push localhost:5001/pcm-local)
+```
+
+3) When deploying to kind cluster pcm use values to switch to local pcm-local image
+```
+helm install pcm . -f values-local-image.yaml
+```
+
+4) Replace pcm-sensor-server with pcm or sleep to be able to run `gdb` or `strace` for example
+```
+helm upgrade --install pcm . --set debugPcm=true
+helm upgrade --install pcm . --set debugSleep=true
+```
+
+**TODO:** consider debug options to be removed before release for security reasons
+
+5) Check logs or interact with container directly:
+```
+# exec into pcm container
+kubectl exec -ti ds/pcm -- bash
+# or check logs
+kubectl logs ds/pcm
+```
+
+### Metric collection methods (capabilities vs requirements)
+
+
+
+| Metrics               | Available on Hardware         | Available through interface  | Available through method |
+| --------------------- | ----------------------------- | ---------------------------- | ------------------------ |
+| core                  | bare-metal, VM (any)          | msr or perf                  | any                      |
+| uncore (UPI)          | bare-metal, VM (all sockets)  | msr or perf                  | any                      |
+| RDT (MBW,L3OCCUP)     | bare-metal, VM (all sockets)  | msr or resctrl               | any                      |
+| energy, temp          | bare-metal (only)             | msr                          | direct                   |
+| perf-topdown          |                               | perf only                    | indirect                 |
+
+
+| Interface     | Requirements                                               |  Controlled by (env/helm value) |  default helm         | Used by source code                                      | Notes                                               |
+|---------------|------------------------------------------------------------|---------------------------------|-----------------------|----------------------------------------------------------|-----------------------------------------------------|
+| perf          | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN      | PCM_NO_PERF                     | use perf              | programPerfEvent(), PerfVirtualControlRegister()         |                                                     |
+| perf-uncore   | sys_perf_open() perf_paranoid<=0/privileged/CAP_ADMIN      | PCM_USE_UNCORE_PERF             | use perf for uncore   | programPerfEvent(), PerfVirtualControlRegister()         |                                                     |
+| perf-topdown  | /sys/bus/event_source/devices/cpu/events                   | sysMount                        | yes                   | cpucounters.cpp:perfSupportsTopDown()                    | TODO: conflicts with sys/fs/resctrl                 |
+| RDT           | uses "msr" or "resctrl" interface                          | PCM_NO_RDT                      | yes                   | cpucounters.cpp:isRDTDisabled()/QOSMetricAvailable()     |                                                     |
+| resctrl       | RW: /sys/fs/resctrl                                        | PCM_USE_RESCTRL                 | yes                   | resctrl.cpp                                              | resctrlMount                                    |
+| watchdog      | RO/RW: /proc/sys/kernel/nmi_watchdog                       | PCM_KEEP_NMI_WATCHDOG           | yes (tries to disable)| src/cpucounters.cpp:disableNMIWatchdog()                 |                                                     |
+| msr           | RW: /dev/cpu/X/msr + privileged or CAP_ADMIN/CAP_RAWIO     | PCM_NO_MSR                      | msr is disabled       | msr.cpp:MsrHandle()                                      | privileged or some method to access /dev/cpu        |
+|               | RW: /dev/mem                                               | ?                               | msr is disabled       | cpucounters.cpp:initUncoreObjects, pci.cpp:PCIHandleM()  | privileged or some method to access /dev/cpu        |
+|               | RO/RW: /sys/module/msr/parameters                          | PCM_NO_MSR                      | msr is disabled       | msr.cpp:MsrHandle()                                      | sysMount                                            |
+|               | RW: /proc/bus/pci                                          | PCM_USE_UNCORE_PERF             | msr is disabled       | pci.cpp:PCIHandle()                                      | pciMount                                            |
+|               | RO: /sys/firmware/acpi/tables/MCFG                         | PCM_USE_UNCORE_PERF             | msr is disabled       | pci.cpp:PciHandle::openMcfgTable()                       | mcfgMount                                           |
+|               | energy                                                     |                                 |                       | cpucounters.cpp initEnergyMonitoring()                   |                                                     |
+
+
diff --git a/deployment/pcm/docs/direct-unprivileged-deployment.md b/deployment/pcm/docs/direct-unprivileged-deployment.md
new file mode 100644
index 00000000..fd760a17
--- /dev/null
+++ b/deployment/pcm/docs/direct-unprivileged-deployment.md
@@ -0,0 +1,67 @@
+--------------------------------------------------------------------------------
+Examples of deploying with direct MSR access as non-privileged container
+--------------------------------------------------------------------------------
+
+#### Direct method as non-privileged container (not recommended)
+
+##### a) Device injection using 3rd party device-plugin
+
+TO run PCM with as non privileged pod, we can third party devices plugins e.g.:
+
+- https://github.com/smarter-project/smarter-device-manager
+- https://github.com/squat/generic-device-plugin
+- https://github.com/everpeace/k8s-host-device-plugin
+
+**Warning** This plugins were NOT audited for security concerns, **use it at your own risk**.
+
+Below is example how to pass /dev/cpu and /dev/mem using smarter-device-manager in kind based Kubernetes test cluster.
+
+```
+# Label node to deploy device plugin on that node
+kubectl label node kind-control-plane smarter-device-manager=enabled
+
+# Install "smarter-device-manager" device plugin with only /dev/cpu and /dev/mem devices enabled:
+git clone https://github.com/smarter-project/smarter-device-manager
+helm install smarter-device-plugin --create-namespace --namespace smarter-device-plugin smarter-device-manager/charts/smarter-device-manager --set 'config[0].devicematch=^cpu$' --set 'config[0].nummaxdevices=1' --set 'config[1].devicematch=^mem$' --set 'config[1].nummaxdevices=1'
+
+# Check that cpu and mem devices are available - should return "1"
+kubectl get node kind-control-plane -o json | jq .status.capacity
+
+# Install pcm helm chart in unprivileged mode with extraResources for cpu and memory devices.
+helm install pcm . -f docs/direct-unprivileged-examples/values-direct-unprivileged.yaml -f docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml 
+```
+
+##### b) Device injection using NRI plugin device-injection 
+
+**TODO**: **Warning** This is work in progress, because it is needed to manually specific all /dev/cpu/XX/msr devices, which is unpractical in production (TO BE MOVED TO EXTERNAL FILE).
+
+```
+git clone https://github.com/containerd/nri/
+(cd nri/plugins/device-injector/ && go build )
+docker cp kind-control-plane:/etc/containerd/config.toml config.toml
+
+cat >>config.toml <<EOF
+  [plugins."io.containerd.nri.v1.nri"]
+    # Disable NRI support in containerd.
+    disable = false
+    # Allow connections from externally launched NRI plugins.
+    disable_connections = false
+    # plugin_config_path is the directory to search for plugin-specific configuration.
+    plugin_config_path = "/etc/nri/conf.d"
+    # plugin_path is the directory to search for plugins to launch on startup.
+    plugin_path = "/opt/nri/plugins"
+    # plugin_registration_timeout is the timeout for a plugin to register after connection.
+    plugin_registration_timeout = "5s"
+    # plugin_requst_timeout is the timeout for a plugin to handle an event/request.
+    plugin_request_timeout = "2s"
+    # socket_path is the path of the NRI socket to create for plugins to connect to.
+    socket_path = "/var/run/nri/nri.sock"
+EOF
+
+docker cp config.toml kind-control-plane:/etc/containerd/config.toml 
+docker exec kind-control-plane systemctl restart containerd
+docker exec kind-control-plane systemd-run -u device-injector /device-injector -idx 10 -verbose
+docker exec kind-control-plane systemctl status device-injector
+
+helm install pcm . -f docs/direct-unprivileged-examples/values-direct-unprivileged.yaml -f docs/direct-unprivileged-examples/values-device-injector.yaml 
+```
diff --git a/deployment/pcm/docs/direct-unprivileged-examples/values-device-injector.yaml b/deployment/pcm/docs/direct-unprivileged-examples/values-device-injector.yaml
new file mode 100644
index 00000000..c012161d
--- /dev/null
+++ b/deployment/pcm/docs/direct-unprivileged-examples/values-device-injector.yaml
@@ -0,0 +1,16 @@
+# Requires device injector nri Plugin
+# https://github.com/containerd/nri/tree/main/plugins/device-injector
+podAnnotations:
+  devices.nri.io/container.pcm: |+
+    - path: /dev/cpu/0/msr
+      type: c
+      major: 202
+      minor: 0
+    - path: /dev/cpu/1/msr
+      type: c
+      major: 202
+      minor: 1
+    - path: /dev/mem
+      type: c
+      major: 1
+      minor: 1
diff --git a/deployment/pcm/docs/direct-unprivileged-examples/values-direct-unprivileged.yaml b/deployment/pcm/docs/direct-unprivileged-examples/values-direct-unprivileged.yaml
new file mode 100644
index 00000000..9d863840
--- /dev/null
+++ b/deployment/pcm/docs/direct-unprivileged-examples/values-direct-unprivileged.yaml
@@ -0,0 +1,19 @@
+# Warning: this file is to be used or direct unprivilegd access which requires 3rd party plugin
+# e.g. device-injector NRI or smarter-devices-cpu-mem
+privileged: false
+
+# Swtich to using MSR
+PCM_NO_MSR: 0               # use MSR
+PCM_NO_PERF: 1              # do not use Linux perf 
+PCM_USE_UNCORE_PERF: 0      # also use MSR for uncore
+PCM_NO_RDT: 0               # Collect RDT data
+PCM_USE_RESCTRL: 0          # using MSR (no resctrl)
+
+# RDT metrics will be used by direct msr programming
+resctrlMount: false     
+resctrlInsideMount: false
+
+# sys and pci mounts are required for uncore PMU devices discovery
+sysMount: true              # /pcm/sys is required
+pciMount: true              # /pcm/proc/bus/pci is required
+
diff --git a/deployment/pcm/docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml b/deployment/pcm/docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml
new file mode 100644
index 00000000..4808cf4f
--- /dev/null
+++ b/deployment/pcm/docs/direct-unprivileged-examples/values-smarter-devices-cpu-mem.yaml
@@ -0,0 +1,9 @@
+# Requires smarter device manager
+# https://github.com/smarter-project/smarter-device-manager
+extraResources:
+  requests:
+    smarter-devices/cpu: 1
+    smarter-devices/mem: 1
+  limits:
+    smarter-devices/cpu: 1
+    smarter-devices/mem: 1
diff --git a/deployment/pcm/helm_chart_test_and_notes_TODO/NOTES.txt b/deployment/pcm/helm_chart_test_and_notes_TODO/NOTES.txt
new file mode 100644
index 00000000..8cc22cef
--- /dev/null
+++ b/deployment/pcm/helm_chart_test_and_notes_TODO/NOTES.txt
@@ -0,0 +1,6 @@
+1. Get the application URL by running these commands:
+
+export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "pcm.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+echo "Visit http://127.0.0.1:8080 to use your application"
+kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
diff --git a/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml b/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml
new file mode 100644
index 00000000..d26943be
--- /dev/null
+++ b/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "pcm.fullname" . }}-test-connection"
+  labels:
+    {{- include "pcm.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+spec:
+  containers:
+    - name: wget
+      image: busybox
+      command: ['wget']
+      args: ['{{ include "pcm.fullname" . }}:9738']
+  restartPolicy: Never
diff --git a/deployment/pcm/k8s-test.sh b/deployment/pcm/k8s-test.sh
new file mode 100644
index 00000000..01e67eb7
--- /dev/null
+++ b/deployment/pcm/k8s-test.sh
@@ -0,0 +1,77 @@
+####
+modprobe msr
+lsmod | egrep '^msr'
+
+# Create cluster
+kind create cluster
+kind export kubeconfig
+
+# Deploy NodeFeatureDiscovery
+#kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.15.1
+kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.16.0-devel
+kubectl get node -o jsonpath='{.items[0].metadata.labels.feature\.node\.kubernetes\.io\/cpu\-model\.vendor_id}{"\n"}'
+kubectl get nodefeature kind-control-plane -n node-feature-discovery -o yaml
+kubectl get node kind-control-plane -o yaml
+
+helm repo add nfd https://kubernetes-sigs.github.io/node-feature-discovery/charts
+helm repo update
+helm install nfd/node-feature-discovery --namespace node-feature-discovery --create-namespace --generate-name
+
+# Deploy prometheus for PodMonitor
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
+kubectl get sts prometheus-prometheus-kube-prometheus-prometheus
+
+# Deploy PCM
+kubectl apply -f pcm-kubernetes.yaml
+
+# Verfiy PCM works as expected
+kubectl -n intel-pcm get daemonset
+kubectl -n intel-pcm get pods
+podname=`kubectl -n intel-pcm get pods -ojsonpath='{.items[0].metadata.name}'`
+kubectl proxy &
+curl -Ls http://127.0.0.1:8001/api/v1/namespaces/intel-pcm/pods/$podname/proxy/metrics | grep DRAM_Writes
+promtool query instant http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy 'avg by(__name__) ({job="pcm"})'
+
+# Metrics
+```
+CStateResidency => 0.09090909090909094 @[1707901856.957]
+Clock_Unhalted_Ref => 1010026077.3913049 @[1707901856.957]
+Clock_Unhalted_Thread => 1295730425.8695648 @[1707901856.957]
+DRAM_Joules_Consumed => 0 @[1707901856.957]
+DRAM_Reads => 3600814506.6666665 @[1707901856.957]
+DRAM_Writes => 1974366592 @[1707901856.957]
+Embedded_DRAM_Reads => 0 @[1707901856.957]
+Embedded_DRAM_Writes => 0 @[1707901856.957]
+Incoming_Data_Traffic_On_Link_0 => 689786624 @[1707901856.957]
+Incoming_Data_Traffic_On_Link_1 => 689454432 @[1707901856.957]
+Incoming_Data_Traffic_On_Link_2 => 0 @[1707901856.957]
+Instructions_Retired_Any => 749013885.5739133 @[1707901856.957]
+Invariant_TSC => 432975372048881700 @[1707901856.957]
+L2_Cache_Hits => 3531524.973913045 @[1707901856.957]
+L2_Cache_Misses => 2334387.130434784 @[1707901856.957]
+L3_Cache_Hits => 1325323.1739130428 @[1707901856.957]
+L3_Cache_Misses => 627863.4000000003 @[1707901856.957]
+L3_Cache_Occupancy => 0 @[1707901856.957]
+Local_Memory_Bandwidth => 0 @[1707901856.957]
+Measurement_Interval_in_us => 14507400443881 @[1707901856.957]
+Memory_Controller_IO_Requests => 0 @[1707901856.957]
+Number_of_sockets => 2 @[1707901856.957]
+OS_ID => 55.499999999999986 @[1707901856.957]
+Outgoing_Data_And_Non_Data_Traffic_On_Link_0 => 1843333122.5 @[1707901856.957]
+Outgoing_Data_And_Non_Data_Traffic_On_Link_1 => 1849219231.5 @[1707901856.957]
+Outgoing_Data_And_Non_Data_Traffic_On_Link_2 => 0 @[1707901856.957]
+Package_Joules_Consumed => 0 @[1707901856.957]
+Persistent_Memory_Reads => 0 @[1707901856.957]
+Persistent_Memory_Writes => 0 @[1707901856.957]
+RawCStateResidency => 89486131.66409859 @[1707901856.957]
+Remote_Memory_Bandwidth => 0 @[1707901856.957]
+SMI_Count => 0 @[1707901856.957]
+Thermal_Headroom => -2147483648 @[1707901856.957]
+Utilization_Incoming_Data_Traffic_On_Link_0 => 0 @[1707901856.957]
+Utilization_Incoming_Data_Traffic_On_Link_1 => 0 @[1707901856.957]
+Utilization_Incoming_Data_Traffic_On_Link_2 => 0 @[1707901856.957]
+Utilization_Outgoing_Data_And_Non_Data_Traffic_On_Link_0 => 0 @[1707901856.957]
+Utilization_Outgoing_Data_And_Non_Data_Traffic_On_Link_1 => 0 @[1707901856.957]
+Utilization_Outgoing_Data_And_Non_Data_Traffic_On_Link_2 => 0 @[1707901856.957]
+```
diff --git a/deployment/pcm/templates/_helpers.tpl b/deployment/pcm/templates/_helpers.tpl
new file mode 100644
index 00000000..fffa7025
--- /dev/null
+++ b/deployment/pcm/templates/_helpers.tpl
@@ -0,0 +1,79 @@
+{{/* Expand the name of the chart.  */}}
+{{- define "pcm.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/* Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.  */}}
+{{- define "pcm.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/* Create chart name and version as used by the chart label.  */}}
+{{- define "pcm.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/* Selector labels */}}
+{{- define "pcm.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "pcm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/component: pcm-sensor-server
+{{- end }}
+
+{{/* Common labels */}}
+{{- define "pcm.labels" -}}
+helm.sh/chart: {{ include "pcm.chart" . }}
+{{ include "pcm.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/* SecurityContext privileged or capabilties */}}
+{{- define "pcm.securityContext" -}}
+securityContext:
+{{- if .Values.privileged }}
+  privileged: true
+{{- else -}}
+  {{/* TODO?
+  readOnlyRootFilesystem: false
+  runAsUser: 0
+  runAsGroup: 0
+  ## below two doesnt work on container level!
+  fsGroup: 0
+  supplementalGroups: [0]
+  seccompProfile:
+    #type: RuntimeDefault
+    type: Unconfined
+  */}}
+  capabilities:
+    add:
+    - {{ if .Values.cap_perfmon }}PERFMON{{ else }}SYS_ADMIN{{ end }} 
+    - SYS_RAWIO
+{{- end }}
+{{- end }}
+
+
+{{/* Probes: liveness and readiness probe */}}
+{{- define "pcm.probe" -}}
+failureThreshold: 3
+httpGet:
+  path: /
+  port: 9738
+  scheme: HTTP
+periodSeconds: 10
+successThreshold: 1
+timeoutSeconds: 1
+{{- end }}
diff --git a/deployment/pcm/templates/daemonset.yaml b/deployment/pcm/templates/daemonset.yaml
new file mode 100644
index 00000000..6625fd15
--- /dev/null
+++ b/deployment/pcm/templates/daemonset.yaml
@@ -0,0 +1,201 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: {{ include "pcm.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "pcm.labels" . | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "pcm.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "pcm.labels" . | nindent 8 }}
+      annotations:
+      {{- with .Values.podAnnotations }}{{- toYaml . | nindent 8 }}{{- end }}
+      {{- if .Values.nriBalloonsPolicyIntegration }}
+        cpu.preserve.resource-policy.nri.io: "true"
+      {{- end }}
+    spec:
+      nodeSelector:
+        {{- with .Values.nodeSelector -}}{{- toYaml . | nindent 8 -}}{{- end -}}
+      {{- if .Values.nfd }}
+        feature.node.kubernetes.io/cpu-model.vendor_id: Intel
+        {{- if .Values.nfdRDTAffinity }}
+        feature.node.kubernetes.io/cpu-rdt.RDTCMT: "true"
+        feature.node.kubernetes.io/cpu-rdt.RDTL3CA: "true"
+        feature.node.kubernetes.io/cpu-rdt.RDTMBA: "true"
+        feature.node.kubernetes.io/cpu-rdt.RDTMBM: "true"
+        feature.node.kubernetes.io/cpu-rdt.RDTMON: "true"
+        {{- end }}
+      {{- if .Values.nfdBaremetalAffinity}}
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: "feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR"
+                operator: DoesNotExist
+      {{- end }}
+      {{- end }} {{/* if nfd */}}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end -}}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      terminationGracePeriodSeconds: 0
+      containers:
+      - name: pcm
+        image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+        imagePullPolicy: {{ .Values.image.pullPolicy }}
+        {{- include "pcm.securityContext" . | nindent 8 }}
+        {{- if .Values.silent }}
+        command:
+        - "/usr/local/bin/pcm-sensor-server"
+        - "-p"
+        - "9738"
+        - "-r"
+        - "-silent"
+        {{- end -}}
+        {{- if .Values.debugSleep }}
+        command:
+        - /usr/bin/sleep
+        - inf
+        {{- end -}}
+        {{- if .Values.debugPcm }}
+        command:
+        - /bin/bash
+        - -c 
+        - "/usr/local/bin/pcm 2 -r -nc -nsys{{ if .Values.silent }} -silent{{ end }}"
+        {{- end -}}
+        {{- if .Values.resctrlInternalMount }}
+        # Ugly hack to mount resctrl inside only for baremetal when we want use resctrl abstraction and is not mounted on HOST: TBC conflicts with 
+        command:
+        - /bin/bash
+        - -c 
+        - "dnf install -q -y util-linux-core; mount -t resctrl resctrl /sys/fs/resctrl; /usr/local/bin/pcm-sensor-server -p 9738 -r"
+        {{- end -}}
+        {{/* ALREADY DONE by securityContext on pod level
+        securityContext:
+          {{- toYaml .Values.podSecurityContext | nindent 12 }}
+        */}} 
+        resources:
+          requests:
+            {{ with .Values.cpuRequest }}cpu: {{.}}{{ end }}
+            {{ with .Values.memoryRequest }}memory: {{.}}{{ end }}
+            {{- with .Values.extraResources }} {{- toYaml .requests | nindent 12 }} {{- end }}
+          limits:
+            {{ with .Values.cpuLimit }}cpu: {{.}}{{ end }}
+            {{ with .Values.memoryLimit }}memory: {{.}}{{ end }}
+            {{- with .Values.extraResources }} {{- toYaml .limits | nindent 12 }} {{- end }}
+        env:
+        - name: PCM_NO_MSR
+          value: {{ .Values.PCM_NO_MSR | quote }} 
+        - name: PCM_NO_PERF
+          value: {{ .Values.PCM_NO_PERF | quote }} 
+        - name: PCM_USE_UNCORE_PERF
+          value: {{ .Values.PCM_USE_UNCORE_PERF | quote }} 
+        - name: PCM_NO_RDT
+          value: {{ .Values.PCM_NO_RDT | quote }} 
+        - name: PCM_USE_RESCTRL
+          value: {{ .Values.PCM_USE_RESCTRL | quote }} 
+        - name: PCM_IGNORE_ARCH_PERFMON
+          value: {{ .Values.PCM_IGNORE_ARCH_PERFMON | quote }} 
+        - name: PCM_KEEP_NMI_WATCHDOG
+          value: {{ .Values.PCM_KEEP_NMI_WATCHDOG | quote }} 
+        - name: PCM_NO_AWS_WORKAROUND
+          value: {{ .Values.PCM_NO_AWS_WORKAROUND | quote }} 
+        - name: PCM_NO_UNCORE_PMU_DISCOVERY
+          value: {{ .Values.PCM_NO_UNCORE_PMU_DISCOVERY | quote }} 
+        - name: PCM_PRINT_UNCORE_PMU_DISCOVERY
+          value: {{ .Values.PCM_PRINT_UNCORE_PMU_DISCOVERY | quote }} 
+        - name: PCM_PRINT_TOPOLOGY
+          value: {{ .Values.PCM_PRINT_TOPOLOGY | quote }} 
+        - name: PCM_NO_MAIN_EXCEPTION_HANDLER
+          value: {{ .Values.PCM_NO_MAIN_EXCEPTION_HANDLER | quote }} 
+        {{- with .Values.probes }}
+        livenessProbe:
+          {{- include "pcm.probe" . | nindent 12 }}
+        readinessProbe:
+          {{- include "pcm.probe" . | nindent 12 }}
+        {{- end }}
+        {{- with .Values.hostPort }}
+        ports:
+        - containerPort: 9738
+          hostPort: {{ . }} 
+          name: pcm-metrics
+          protocol: TCP
+        {{- end }}
+        volumeMounts:
+        # {{- if .Values.privileged }}
+        # - mountPath: /pcm/dev/cpu
+        #   name: dev-cpu
+        #   readOnly: false
+        # - mountPath: /pcm/dev/mem
+        #   name: dev-mem
+        #   readOnly: false
+        # {{- end }}
+        {{- if .Values.pciMount }}
+        - mountPath: /pcm/proc/bus/pci
+          name: proc-pci
+        {{- end }}
+        {{- if .Values.sysMount }}
+        - mountPath: /pcm/sys
+          name: sysfs
+          readOnly: true
+        {{- end }}
+        {{- if .Values.nmiWatchdogMount }}
+        - mountPath: /pcm/proc/sys/kernel/nmi_watchdog
+          name: nmi-watchdog
+          readOnly: true  # RW? # TODO
+        {{- end }}
+        {{- if .Values.resctrlMount }}
+        - mountPath: /sys/fs/resctrl
+          name: sysfs-resctrl
+        {{- end }}
+        # TODO: to be removed, already handled by /sysMount
+        # {{- if .Values.mcfgMount }}
+        # - mountPath: /pcm/sys/firmware/acpi/tables/MCFG
+        #   name: sys-acpi
+        #   readOnly: true
+        # {{- end }}
+      volumes:
+      # {{- if .Values.privileged }}
+      # - name: dev-cpu
+      #   hostPath:
+      #     path: /dev/cpu
+      # - name: dev-mem
+      #   hostPath:
+      #     path: /dev/mem
+      # {{- end}}
+      {{- if .Values.sysMount }}
+      - name: sysfs
+        hostPath:
+          path: /sys
+      {{- end}}
+      {{- if .Values.pciMount }}
+      - name: proc-pci
+        hostPath:
+          path: /proc/bus/pci
+      {{- end}}
+      {{- if .Values.nmiWatchdogMount }}
+      - name: nmi-watchdog
+        hostPath:
+          path: /proc/sys/kernel/nmi_watchdog
+      {{- end }}
+      # TODO: to be removed, already handled by /sysMount
+      # {{- if .Values.mcfgMount }}
+      # - name: sys-acpi
+      #   hostPath:
+      #     path: /sys/firmware/acpi/tables/MCFG
+      # {{- end }}
+      {{- if .Values.resctrlMount }}
+      - name: sysfs-resctrl
+        hostPath:
+          path: /sys/fs/resctrl
+      {{- end }}
diff --git a/deployment/pcm/templates/podmonitor.yaml b/deployment/pcm/templates/podmonitor.yaml
new file mode 100644
index 00000000..b9477e95
--- /dev/null
+++ b/deployment/pcm/templates/podmonitor.yaml
@@ -0,0 +1,41 @@
+{{- if .Values.podMonitor }}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: {{ include "pcm.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "pcm.labels" . | nindent 4 }}
+    app.kubernetes.io/component: metrics
+    jobLabel: pcm
+    {{- with .Values.podMonitorLabels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  attachMetadata:
+    node: true
+  jobLabel: jobLabel
+  namespaceSelector:
+    matchNames:
+    - {{ .Release.Namespace }}
+  podMetricsEndpoints:
+  # requires hostPort to be set {{ required "A valid .Values.hostPort is required with PodMonitor enabled " .Values.hostPort }}
+  - enableHttp2: false
+    filterRunning: true
+    followRedirects: false
+    honorLabels: true
+    honorTimestamps: true
+    path: /metrics
+    port: pcm-metrics
+    interval: {{ .Values.podMonitorInterval | quote }}
+    relabelings:
+    - sourceLabels:
+      - __meta_kubernetes_pod_node_name
+      targetLabel: nodename
+    scheme: http
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: pcm-sensor-server
+      app.kubernetes.io/instance: {{ .Release.Name }}
+      app.kubernetes.io/name: pcm
+{{- end }}
diff --git a/deployment/pcm/templates/verticalpodautoscaler.yaml b/deployment/pcm/templates/verticalpodautoscaler.yaml
new file mode 100644
index 00000000..9d0941d9
--- /dev/null
+++ b/deployment/pcm/templates/verticalpodautoscaler.yaml
@@ -0,0 +1,40 @@
+{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }}
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+  name: {{ include "pcm.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "pcm.labels" . | nindent 4 }}
+spec:
+  {{- with .Values.verticalPodAutoscaler.recommenders }}
+  recommenders:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  resourcePolicy:
+    containerPolicies:
+    - containerName: pcm
+      {{- with .Values.verticalPodAutoscaler.controlledResources }}
+      controlledResources:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.verticalPodAutoscaler.controlledValues }}
+      controlledValues: {{ . }}
+      {{- end }}
+      {{- with .Values.verticalPodAutoscaler.maxAllowed }}
+      maxAllowed:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.verticalPodAutoscaler.minAllowed }}
+      minAllowed:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  targetRef:
+    apiVersion: apps/v1
+    kind: DaemonSet
+    name: {{ include "pcm.fullname" . }}
+  {{- with .Values.verticalPodAutoscaler.updatePolicy }}
+  updatePolicy:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{- end }}
diff --git a/deployment/pcm/values-direct-privileged.yaml b/deployment/pcm/values-direct-privileged.yaml
new file mode 100644
index 00000000..c307438d
--- /dev/null
+++ b/deployment/pcm/values-direct-privileged.yaml
@@ -0,0 +1,16 @@
+#### Tunning for "direct" privilaged access
+privileged: true
+
+# Switch PCM to use msr access always
+PCM_NO_MSR: 0               # use MSR
+PCM_NO_PERF: 1              # do not use Linux perf 
+PCM_USE_UNCORE_PERF: 0      # also use MSR for uncore
+PCM_NO_RDT: 0               # Enable RDT metrics ...
+PCM_USE_RESCTRL: 0          # but using MSR (no resctrl filesystem)
+
+# with privileged container addtional mounts aren't required
+resctrlMount: false     # with MSR resctrl mount is not needed
+resctrlInsideMount: false
+sysMount: false
+pciMount: false
+mcfgMount: false
diff --git a/deployment/pcm/values-local-image.yaml b/deployment/pcm/values-local-image.yaml
new file mode 100644
index 00000000..7d1c336d
--- /dev/null
+++ b/deployment/pcm/values-local-image.yaml
@@ -0,0 +1,4 @@
+image:
+  repository: localhost:5001/pcm-local
+  tag: "latest"
+  pullPolicy: Always
diff --git a/deployment/pcm/values-metal.yaml b/deployment/pcm/values-metal.yaml
new file mode 100644
index 00000000..1ca73c1e
--- /dev/null
+++ b/deployment/pcm/values-metal.yaml
@@ -0,0 +1,8 @@
+#### ================ Tunning for bare-metal instances ================
+# with node-feature-discovery node affinity for non hypervisor and RDT
+nmiWatchdogMount: false
+PCM_NO_AWS_WORKAROUND: 1
+PCM_KEEP_NMI_WATCHDOG: 0
+nfd: true  
+nfdBaremetalAffinity: true
+nfdRDTAffinity: true
diff --git a/deployment/pcm/values-vm.yaml b/deployment/pcm/values-vm.yaml
new file mode 100644
index 00000000..e9a43327
--- /dev/null
+++ b/deployment/pcm/values-vm.yaml
@@ -0,0 +1,6 @@
+#### ================ Tunning for VM ================
+nmiWatchdogMount: true
+
+# Disable RDT because is not avaiable for VM instances
+PCM_NO_RDT: 1                 
+resctrlMount: false
diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml
new file mode 100644
index 00000000..b8eda605
--- /dev/null
+++ b/deployment/pcm/values.yaml
@@ -0,0 +1,166 @@
+### -------------- Naming -------------------
+# used in 
+# - common label: app.kubernetes.io/name otherwise "Chart name"
+# - also in selectorLabels together with release.name
+# defaults to "Chart.name"
+nameOverride: ""        
+# Used as daemonset name (usually based on truncated "name + release name")
+fullnameOverride: ""
+
+### -------------- Image options ------------
+image:
+  repository: ghcr.io/intel/pcm
+  pullPolicy: IfNotPresent
+  tag: "latest"               # uses .Chart.AppVersion if empty
+imagePullSecrets: {}
+
+### -------------- Security ------------------
+# Configures SecurityContext to not privileged (by default) so SYS_ADMIN/SYS_RAWIO capabilietes are required for running pod
+privileged: false
+
+# Use new kernel 5.8+ PERFMON (least privileged) instead of generic SYS_ADMIN capability
+# !Warning requires kernel 5.8+ 
+# more info here: https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html#perf-events-access-control
+cap_perfmon: true
+
+# Run pcm in silent mode (additional -silent argument to pcm-sensor-server binary)
+# Removes some of debug outputs (like warnings about unability to open some /sys... /proc... files)
+silent: false
+
+### -------------- Required OS affinity -------
+# Should only running on linux
+nodeSelector:
+  kubernetes.io/os: linux
+
+### -------------- Probes ---------------------
+probes: false
+
+### ================ Metrics configuration ======================
+
+### -------------- Metrics: Uncore ------------
+# Mounts section
+# NOTE: only required for direct mode
+# required for uncore metrics discovery and working only in baremetal, not available for VM 
+sysMount: false         # mounts host /sys into container /pcm/sys/
+pciMount: false         # mounts host /proc/bus/pci into container /pcm/proc/bus/pci/
+
+# NOTE this is only required for direct unprivileged mode  ?!?!?!
+# TODO: to be removed!!!?!?!!?!? (already coverred sysMounts !!!!)
+#mcfgMount: false       # mounts hosts: /sys/firmware/acpi/tables/MCFG -> /pcm/sys/firmware/acpi/tables/MCFG
+
+### linux Perf (indirect) vs msr(direct)
+# Lets try "indirect" as default
+PCM_NO_MSR: 1                 # do not use MSR
+PCM_NO_PERF: 0                # use Linux Perf over MSR for core metrics
+PCM_USE_UNCORE_PERF: 1        # use Linux Perf instead of MSR for uncore metrics (collection+detection)
+
+### -------------- Metrics: RDT ---------------
+### RDT rdt/resctrl:
+PCM_NO_RDT: 0                 # 0 - try to collect RDT data, enables local/remote memory bandwidth + llc occupancy
+PCM_USE_RESCTRL: 1            # use Linux Perf  instead of MSR access (more reliable)
+# required for indirect RDT access, not available for VM only in baremetal
+# do not mount by default RDT can be also accessed through direct MSR programming
+resctrlMount: true        # mount from external host
+resctrlInsideMount: false     # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require 
+
+### -------------- Other (NMI handling and/or on VM/AWS)
+PCM_IGNORE_ARCH_PERFMON: 0    # After VM is detected through CPUID (hypervisor flag) - check arch_perfmon flag to be also enabled - fail if not avaiable (0 - do check, 1 - disable check)
+# 0: Disabling NMI watchdog since it consumes one hw-PMU counter, requires nmiWatchdogMount to be true
+# 1: don't disable NMI watchdog (reducing the core metrics set) - prefferd for production usage!
+# but even with 0 automatic AWS workround applies! 
+PCM_KEEP_NMI_WATCHDOG: 0
+# workaround: after VM is detected: "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n";)
+# 1: disables workaround and tries to use four programable counters (without workaround on VM will pcm-sensor-server will hang)
+# Please do not disable (value=1) on VMs
+PCM_NO_AWS_WORKAROUND: 0
+
+# mounting watchdog is recommened when PCM_KEEP_NMI_WATCHDOG=0 or we expect AWS workaround to be applied
+nmiWatchdogMount: true
+
+### -------------- Other (Debugging options for uncore pmu discovery)
+PCM_NO_UNCORE_PMU_DISCOVERY: 0      #  skip 1: this is not required for direct privileged access and with 0 ends with WARNING enumaration failed
+PCM_PRINT_UNCORE_PMU_DISCOVERY: 1   #  show: discovered pmu 
+PCM_PRINT_TOPOLOGY: 0               #  show individual CPU topology  for each core (plenty of lines)
+PCM_NO_MAIN_EXCEPTION_HANDLER: 0    #  show full call stack of error
+
+### =============================== Optional POD fields no related to PCM ===============================
+# Pod level
+podAnnotations: {}
+podLabels: {}
+# Container level
+tolerations: []
+# Resources cpu/mem
+cpuLimit: 100m
+cpuRequest: 100m
+memoryLimit: 512Mi
+memoryRequest: 256Mi
+# requests, limits level need to be specified here
+extraResources: {} 
+
+### =============================== Integrations with other projects ====================================
+#
+### -------------- Prometheus operator --------------------
+# Expose run containerPort "pcm-sensor-server -p 9738" as hostPort, can be empty to disable hostPort
+hostPort: 9738
+# Deploy PromtheusOperator PodMonitor (requires hostPort to be not empty)
+podMonitor: false
+# Extra PodMonitor labels to let Prometheus operator filter based on that
+# e.g. default "kube-prometheus-stack" helm chart requires additional release:"{name of chart release}" label in podMonitor to be considered
+# here is example how to check extra labels required to be added to PodMonitor
+# 1) kubectl get prometheus -o jsonpath='{.items[].spec.podMonitorSelector.matchLabels}' # e.g. release: prometheus
+# 2) helm install pcm . --set podMonitor=true --set podMonitorLabels.release=prometheus
+podMonitorLabels: {}
+# Default interval for Prometheus scrapping configuration
+podMonitorInterval: 30s
+
+
+### -------------- NRI balloons policy plugin -------------
+# PCM deployment to be intergrated with NRI balloons resource policy intergration
+# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. 
+nriBalloonsPolicyIntegration: false
+
+### -------------  node-feature-discovery -----------------
+# when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal) 
+nfd: false  
+# if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true)
+nfdBaremetalAffinity: false
+# if enabled, followin RDT labels will be required for scheduling (requires nfd=true)
+# feature.node.kubernetes.io/cpu-rdt.RDTCMT=true
+# feature.node.kubernetes.io/cpu-rdt.RDTL3CA=true
+# feature.node.kubernetes.io/cpu-rdt.RDTMBA=true
+# feature.node.kubernetes.io/cpu-rdt.RDTMBM=true
+# feature.node.kubernetes.io/cpu-rdt.RDTMON=true
+nfdRDTAffinity: false
+
+
+### -------------- verticalPodAutoscaler ------------------
+# Enable vertical pod autoscaler support for pcm-sensor-server
+verticalPodAutoscaler:
+  enabled: false
+
+  # Recommender responsible for generating recommendation for the object.
+  # List should be empty (then the default recommender will generate the recommendation)
+  # or contain exactly one recommender.
+  # recommenders:
+  # - name: custom-recommender-performance
+
+  # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory
+  controlledResources: []
+  # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits.
+  # controlledValues: RequestsAndLimits
+
+  # Define the max allowed resources for the pod
+  maxAllowed: {}
+  # cpu: 200m
+  # memory: 100Mi
+  # Define the min allowed resources for the pod
+  minAllowed: {}
+  # cpu: 200m
+  # memory: 100Mi
+
+  # updatePolicy:
+    # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction
+    # minReplicas: 1
+    # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates
+    # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto".
+    # updateMode: Auto
diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp
index ef6bdc89..ec16202d 100644
--- a/src/cpucounters.cpp
+++ b/src/cpucounters.cpp
@@ -552,7 +552,7 @@ bool PCM::L3CacheOccupancyMetricAvailable() const
 
 bool PCM::CoreLocalMemoryBWMetricAvailable() const
 {
-    if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
+    //if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
     PCM_CPUID_INFO cpuinfo;
     if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
             return false;
@@ -562,7 +562,7 @@ bool PCM::CoreLocalMemoryBWMetricAvailable() const
 
 bool PCM::CoreRemoteMemoryBWMetricAvailable() const
 {
-    if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
+    //if (cpu_model == SKX && cpu_stepping < 5) return false; // SKZ4 errata
     PCM_CPUID_INFO cpuinfo;
     if (!(QOSMetricAvailable() && L3QOSMetricAvailable()))
         return false;
diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp
index 72a89ec5..70e05dc6 100644
--- a/src/pcm-sensor-server.cpp
+++ b/src/pcm-sensor-server.cpp
@@ -427,6 +427,10 @@ class JSONPrinter : Visitor
         PCM* pcm = PCM::getInstance();
         printCounter( "DRAM Writes",                   getBytesWrittenToMC    ( before, after ) );
         printCounter( "DRAM Reads",                    getBytesReadFromMC     ( before, after ) );
+
+        if (pcm->localMemoryRequestRatioMetricAvailable())
+            printCounter( "DRAM Local Percentage",         getLocalMemoryRequestRatio( before, after ) );
+
         if(pcm->nearMemoryMetricsAvailable()){
             printCounter( "NM HitRate",                    getNMHitRate           ( before, after ) );
             printCounter( "NM Hits",                       getNMHits              ( before, after ) );
@@ -715,6 +719,10 @@ class PrometheusPrinter : Visitor
         addToHierarchy( "source=\"uncore\"" );
         printCounter( "DRAM Writes",                   getBytesWrittenToMC    ( before, after ) );
         printCounter( "DRAM Reads",                    getBytesReadFromMC     ( before, after ) );
+
+        if (pcm->localMemoryRequestRatioMetricAvailable())
+            printCounter( "DRAM Local Percentage",         getLocalMemoryRequestRatio( before, after ) );
+
         if(pcm->nearMemoryMetricsAvailable()){
             printCounter( "NM Hits",                       getNMHits              ( before, after ) );
             printCounter( "NM Misses",                     getNMMisses            ( before, after ) );

From d75b013f45d47366608a938f4550f051dabb7a64 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Thu, 6 Jun 2024 16:34:43 +0200
Subject: [PATCH 2/8] First version of linter + tests

---
 deployment/pcm/Chart.yaml                    |  2 +-
 deployment/pcm/Makefile                      |  2 ++
 deployment/pcm/values-direct-privileged.yaml |  1 -
 deployment/pcm/values.yaml                   | 26 ++++++++++----------
 4 files changed, 16 insertions(+), 15 deletions(-)
 create mode 100644 deployment/pcm/Makefile

diff --git a/deployment/pcm/Chart.yaml b/deployment/pcm/Chart.yaml
index f200feaa..685e8b4d 100644
--- a/deployment/pcm/Chart.yaml
+++ b/deployment/pcm/Chart.yaml
@@ -1,7 +1,7 @@
 apiVersion: v2
 name: pcm
 version: 0.1.0
-appVersion: "202403"
+appVersion: "202404"
 description: A PCM Helm chart for Kubernetes
 home: https://github.com/intel/pcm
 maintainers:
diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile
new file mode 100644
index 00000000..6ae38b12
--- /dev/null
+++ b/deployment/pcm/Makefile
@@ -0,0 +1,2 @@
+chart-lint-report.txt: values.yaml templates
+	docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt
diff --git a/deployment/pcm/values-direct-privileged.yaml b/deployment/pcm/values-direct-privileged.yaml
index c307438d..531224c6 100644
--- a/deployment/pcm/values-direct-privileged.yaml
+++ b/deployment/pcm/values-direct-privileged.yaml
@@ -13,4 +13,3 @@ resctrlMount: false     # with MSR resctrl mount is not needed
 resctrlInsideMount: false
 sysMount: false
 pciMount: false
-mcfgMount: false
diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml
index b8eda605..18643f0c 100644
--- a/deployment/pcm/values.yaml
+++ b/deployment/pcm/values.yaml
@@ -1,9 +1,9 @@
 ### -------------- Naming -------------------
-# used in 
+# Used in:
 # - common label: app.kubernetes.io/name otherwise "Chart name"
 # - also in selectorLabels together with release.name
 # defaults to "Chart.name"
-nameOverride: ""        
+nameOverride: ""
 # Used as daemonset name (usually based on truncated "name + release name")
 fullnameOverride: ""
 
@@ -19,7 +19,7 @@ imagePullSecrets: {}
 privileged: false
 
 # Use new kernel 5.8+ PERFMON (least privileged) instead of generic SYS_ADMIN capability
-# !Warning requires kernel 5.8+ 
+# !Warning requires kernel 5.8+
 # more info here: https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html#perf-events-access-control
 cap_perfmon: true
 
@@ -40,13 +40,13 @@ probes: false
 ### -------------- Metrics: Uncore ------------
 # Mounts section
 # NOTE: only required for direct mode
-# required for uncore metrics discovery and working only in baremetal, not available for VM 
+# required for uncore metrics discovery and working only in baremetal, not available for VM
 sysMount: false         # mounts host /sys into container /pcm/sys/
 pciMount: false         # mounts host /proc/bus/pci into container /pcm/proc/bus/pci/
 
 # NOTE this is only required for direct unprivileged mode  ?!?!?!
-# TODO: to be removed!!!?!?!!?!? (already coverred sysMounts !!!!)
-#mcfgMount: false       # mounts hosts: /sys/firmware/acpi/tables/MCFG -> /pcm/sys/firmware/acpi/tables/MCFG
+# TODO: to be removed!!!?!?!!?!? (already coverred sysMounts !!!!) yes or not
+mcfgMount: false       # mounts hosts: /sys/firmware/acpi/tables/MCFG -> /pcm/sys/firmware/acpi/tables/MCFG
 
 ### linux Perf (indirect) vs msr(direct)
 # Lets try "indirect" as default
@@ -61,13 +61,13 @@ PCM_USE_RESCTRL: 1            # use Linux Perf  instead of MSR access (more reli
 # required for indirect RDT access, not available for VM only in baremetal
 # do not mount by default RDT can be also accessed through direct MSR programming
 resctrlMount: true        # mount from external host
-resctrlInsideMount: false     # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require 
+resctrlInsideMount: false     # TODO: mount inside with extra call to mount, requires image with mount installed - doesn't require
 
 ### -------------- Other (NMI handling and/or on VM/AWS)
 PCM_IGNORE_ARCH_PERFMON: 0    # After VM is detected through CPUID (hypervisor flag) - check arch_perfmon flag to be also enabled - fail if not avaiable (0 - do check, 1 - disable check)
 # 0: Disabling NMI watchdog since it consumes one hw-PMU counter, requires nmiWatchdogMount to be true
 # 1: don't disable NMI watchdog (reducing the core metrics set) - prefferd for production usage!
-# but even with 0 automatic AWS workround applies! 
+# but even with 0 automatic AWS workround applies!
 PCM_KEEP_NMI_WATCHDOG: 0
 # workaround: after VM is detected: "INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n";)
 # 1: disables workaround and tries to use four programable counters (without workaround on VM will pcm-sensor-server will hang)
@@ -79,7 +79,7 @@ nmiWatchdogMount: true
 
 ### -------------- Other (Debugging options for uncore pmu discovery)
 PCM_NO_UNCORE_PMU_DISCOVERY: 0      #  skip 1: this is not required for direct privileged access and with 0 ends with WARNING enumaration failed
-PCM_PRINT_UNCORE_PMU_DISCOVERY: 1   #  show: discovered pmu 
+PCM_PRINT_UNCORE_PMU_DISCOVERY: 1   #  show: discovered pmu
 PCM_PRINT_TOPOLOGY: 0               #  show individual CPU topology  for each core (plenty of lines)
 PCM_NO_MAIN_EXCEPTION_HANDLER: 0    #  show full call stack of error
 
@@ -95,7 +95,7 @@ cpuRequest: 100m
 memoryLimit: 512Mi
 memoryRequest: 256Mi
 # requests, limits level need to be specified here
-extraResources: {} 
+extraResources: {}
 
 ### =============================== Integrations with other projects ====================================
 #
@@ -116,12 +116,12 @@ podMonitorInterval: 30s
 
 ### -------------- NRI balloons policy plugin -------------
 # PCM deployment to be intergrated with NRI balloons resource policy intergration
-# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules. 
+# if true, will add special annotation to allow pcm pod use all the core, regardless NRI balloons policy rules.
 nriBalloonsPolicyIntegration: false
 
 ### -------------  node-feature-discovery -----------------
-# when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal) 
-nfd: false  
+# when enabled specific set of labels will be used as node selector (Intel vendor, RDT availability, baremetal)
+nfd: false
 # if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true)
 nfdBaremetalAffinity: false
 # if enabled, followin RDT labels will be required for scheduling (requires nfd=true)

From 92fbe0c0fb8b928993f4d9c815006092b04cb935 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Fri, 7 Jun 2024 12:57:15 +0200
Subject: [PATCH 3/8] README update + better Dockerfile.debug

---
 Dockerfile.debug         |  3 ++-
 deployment/pcm/README.md | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/Dockerfile.debug b/Dockerfile.debug
index da966e97..8b3e04ed 100644
--- a/Dockerfile.debug
+++ b/Dockerfile.debug
@@ -2,5 +2,6 @@ FROM fedora:40@sha256:4e007f288dce23966216be81ef62ba05d139b9338f327c1d1c73b7167d
 
 RUN dnf -y install gcc-c++ git findutils make cmake strace gdb util-linux
 COPY . /tmp/pcm
-RUN --mount=type=cache,target=/tmp/pcm/build cd /tmp/pcm/build && cmake -D CMAKE_BUILD_TYPE=Debug .. && cmake --build . -t pcm pcm-sensor-server pcm-tpmi -j && cp -v /tmp/pcm/build/bin/pcm* /bin/
+RUN --mount=type=cache,target=/tmp/pcm/build cd /tmp/pcm/build && cmake -D CMAKE_BUILD_TYPE=Debug .. && cmake --build . -t pcm pcm-sensor-server pcm-tpmi -j && mkdir -p /usr/local/bin && cp -v /tmp/pcm/build/bin/pcm* /usr/local/bin/
 #ENV PCM_NO_PERF=1
+ENTRYPOINT [ "/usr/local/bin/pcm-sensor-server", "-p", "9738", "-r" ]
diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md
index 338be95c..9605e897 100644
--- a/deployment/pcm/README.md
+++ b/deployment/pcm/README.md
@@ -95,6 +95,7 @@ For validation to verify that all metrics are available without msr, unload "msr
 ```
 rmmod msr
 echo 2 > /proc/sys/kernel/perf_event_paranoid
+cat /proc/sys/kernel/perf_event_paranoid  # expected value 2
 ```
 
 #### 2) Create kind based Kubernetes cluster
@@ -292,6 +293,7 @@ helm install pcm . -f values-vm.yaml
 
 #### Heterogeneous (mixed VM/metal instances) cluster 
 
+values-metal.yaml requires node-feature-discovery to be preinstallaed
 ```
 helm install pcm-vm . -f values-vm.yaml
 helm install pcm-metal . -f values-metal.yaml
@@ -316,20 +318,22 @@ wget https://kind.sigs.k8s.io/examples/kind-with-registry.sh
 bash kind-with-registry.sh
 ```
 
-2) Build docker image and upload to local registry (from project root directory)
-```
-docker build . -t localhost:5001/pcm-local 
-docker push localhost:5001/pcm-local
+2) Build docker image and upload to local registry 
 
+```
 # optionally create buildx based builder
 mkdir ~/.docker/cli-plugins
 curl -sL https://github.com/docker/buildx/releases/download/v0.14.0/buildx-v0.14.0.linux-amd64 -o ~/.docker/cli-plugins/docker-buildx
 chmod +x ~/.docker/cli-plugins/docker-buildx
 docker buildx create --driver docker-container --name mydocker --use --bootstrap
 
-# or with single line (from deployment/pcm/ directory)
-# Build local image for tests/development
-# Following Dockerfile contains source code of pcm and some debugging utils (like gdb,strace for further analysis)
+# Build production image from **project root directory**:
+docker build . -t localhost:5001/pcm-local 
+docker push localhost:5001/pcm-local
+
+# Build/push **debug** image with single line 
+# Debug Dockerfile contains source code of pcm and some debugging utils (like gdb,strace for further analysis)
+# Run from deployment/pcm/ directory:
 (cd ../.. ;  docker build . -f Dockerfile.debug -t localhost:5001/pcm-local && docker push localhost:5001/pcm-local)
 ```
 

From 513b7c93d9d761e2f2fe14a0400d8038332658c8 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Tue, 11 Jun 2024 12:21:42 -0100
Subject: [PATCH 4/8] Chart testing using helm test

---
 .gitignore                                    |  1 +
 deployment/pcm/README.md                      | 15 ++++++++++++++
 .../pcm/templates/_tests/test-connection.yaml | 20 +++++++++++++++++++
 .../pcm/templates/_tests/test-service.yaml    | 19 ++++++++++++++++++
 src/CMakeLists.txt                            |  2 +-
 5 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 deployment/pcm/templates/_tests/test-connection.yaml
 create mode 100644 deployment/pcm/templates/_tests/test-service.yaml

diff --git a/.gitignore b/.gitignore
index 2bd0d9df..01d6f0a2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,4 @@ src/simdjson
 /deployment/pcm/nri/
 /deployment/pcm/kind-with-registry.sh
 /deployment/pcm/autoscaler
+/deployment/pcm/pcm-dashboard.json
diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md
index 9605e897..6f577bea 100644
--- a/deployment/pcm/README.md
+++ b/deployment/pcm/README.md
@@ -358,6 +358,21 @@ kubectl exec -ti ds/pcm -- bash
 kubectl logs ds/pcm
 ```
 
+6) Helm testing
+
+```
+helm test pcm
+
+# in case of failing, see the logs of test connection pod 
+# NOTE: filter is used to ignore service (helm limitation, which tries to download logs from service), so it assumes service exists, because previous run failed
+helm test pcm --logs --filter name=pcm-test-connection
+
+# or run test-connection-pod manually
+kubectl run -ti --rm --image busybox pcm-test-connection-manual -- sh
+kubectl run -ti --rm --image busybox pcm-test-connection-manual -- ping pcm-test-connection -t 1 -W 1 -w 1 -c 1
+kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 pcm-test-connection:9739/metrics
+```
+
 ### Metric collection methods (capabilities vs requirements)
 
 
diff --git a/deployment/pcm/templates/_tests/test-connection.yaml b/deployment/pcm/templates/_tests/test-connection.yaml
new file mode 100644
index 00000000..69f074b5
--- /dev/null
+++ b/deployment/pcm/templates/_tests/test-connection.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "pcm.fullname" . }}-test-connection"
+  #name: pcm
+  # labels:
+  #{{/*   {{- include "pcm.labels" . | nindent 4 }} */}}
+  annotations:
+    "helm.sh/hook": test
+    "helm.sh/hook-weight": "2"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+  containers:
+    - name: wget
+      image: busybox
+      command: 
+      - 'sh' 
+      - '-c'
+      - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection:9739/metrics -O - | grep Measurement_Interval_in_us'
+  restartPolicy: Never
diff --git a/deployment/pcm/templates/_tests/test-service.yaml b/deployment/pcm/templates/_tests/test-service.yaml
new file mode 100644
index 00000000..8416c3f5
--- /dev/null
+++ b/deployment/pcm/templates/_tests/test-service.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: "{{ include "pcm.fullname" . }}-test-connection"
+  labels:
+    {{- include "pcm.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    "helm.sh/hook-weight": "1"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+  type: NodePort
+  ports:
+    - port: 9739
+      targetPort: pcm-metrics
+      protocol: TCP
+      name: pcm-metrics
+  selector:
+    {{- include "pcm.selectorLabels" . | nindent 4 }}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0af9ad1c..a211d7cd 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@
 
 
 # All pcm-* executables
-set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel)
+set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel dashboardtest)
 
 file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp tpmi.cpp pmt.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp)
 

From 6f3d9eba5bf3c050f014d3a1a8e4c75568033455 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Tue, 11 Jun 2024 16:03:49 -0100
Subject: [PATCH 5/8] improve helm test - fix proper namespace

---
 deployment/pcm/README.md                             | 4 ++--
 deployment/pcm/templates/_tests/test-connection.yaml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md
index 6f577bea..5518e74f 100644
--- a/deployment/pcm/README.md
+++ b/deployment/pcm/README.md
@@ -369,8 +369,8 @@ helm test pcm --logs --filter name=pcm-test-connection
 
 # or run test-connection-pod manually
 kubectl run -ti --rm --image busybox pcm-test-connection-manual -- sh
-kubectl run -ti --rm --image busybox pcm-test-connection-manual -- ping pcm-test-connection -t 1 -W 1 -w 1 -c 1
-kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 pcm-test-connection:9739/metrics
+kubectl run -ti --rm --image busybox pcm-test-connection-manual -- ping pcm-test-connection.default.svc.cluster.local -t 1 -W 1 -w 1 -c 1
+kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15 pcm-test-connection.default.svc.cluster.local:9739/metrics
 ```
 
 ### Metric collection methods (capabilities vs requirements)
diff --git a/deployment/pcm/templates/_tests/test-connection.yaml b/deployment/pcm/templates/_tests/test-connection.yaml
index 69f074b5..517a7b7b 100644
--- a/deployment/pcm/templates/_tests/test-connection.yaml
+++ b/deployment/pcm/templates/_tests/test-connection.yaml
@@ -16,5 +16,5 @@ spec:
       command: 
       - 'sh' 
       - '-c'
-      - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection:9739/metrics -O - | grep Measurement_Interval_in_us'
+      - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.localg:9739/metrics -O - | grep Measurement_Interval_in_us'
   restartPolicy: Never

From a088ab0ced49740367bec0aea4f5e4f04db7e489 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Wed, 12 Jun 2024 13:15:06 -0100
Subject: [PATCH 6/8] Initial version of e2e for pcm/prometheus and VPA

---
 .gitignore                                    |  1 +
 deployment/pcm/Makefile                       | 81 +++++++++++++++++++
 .../_tests/test-connection.yaml               | 15 ----
 .../pcm/templates/_tests/test-connection.yaml |  2 +-
 4 files changed, 83 insertions(+), 16 deletions(-)
 delete mode 100644 deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml

diff --git a/.gitignore b/.gitignore
index 01d6f0a2..e1845eb8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,3 +38,4 @@ src/simdjson
 /deployment/pcm/kind-with-registry.sh
 /deployment/pcm/autoscaler
 /deployment/pcm/pcm-dashboard.json
+/deployment/pcm/kind*
diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile
index 6ae38b12..c1c56816 100644
--- a/deployment/pcm/Makefile
+++ b/deployment/pcm/Makefile
@@ -1,2 +1,83 @@
+.PHONY=kind-cluster-clean
+
+# https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile
+define KIND_EXTRA_MOUNTS
+nodes:
+- role: control-plane
+  extraMounts:
+  - hostPath: /sys/fs/resctrl
+    containerPath: /sys/fs/resctrl
+endef
+
 chart-lint-report.txt: values.yaml templates
 	docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt
+
+export KIND_EXTRA_MOUNTS
+kind-with-registry.sh:
+	curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o kind-with-registry.sh.tmp
+	echo "$$KIND_EXTRA_MOUNTS" >kind_extra_mounts.txt
+	sed '/apiVersion: kind.x-k8s.io\/v1alpha4/r kind_extra_mounts.txt' kind-with-registry.sh.tmp >kind-with-registry.sh
+	chmod +x kind-with-registry.sh
+
+kind-cluster: kind-with-registry.sh
+	./kind-with-registry.sh
+	kind export kubeconfig
+	touch kind-cluster
+
+
+clean: 
+	kind delete cluster
+	docker rm -f kind-registry
+	rm -fv kind_extra_mounts.txt
+	rm -fv kind-with-registry.sh
+	rm -fv kind-with-registry.sh.tmp
+	rm -fv kind-deploy-prometheus
+
+
+#
+# minimal
+#
+kind-deploy-pcm:
+	helm install pcm .
+
+kind-pcm-test:
+	helm test pcm
+
+e2e-small: kind-cluster kind-deploy-pcm kind-pcm-test
+
+
+#
+# prometheus (pod monitor test)
+#
+kind-deploy-prometheus:
+	helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+	helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
+	kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1
+	touch kind-deploy-prometheus
+
+kind-pcm-upgrade-prometheus:
+	helm upgrade --install pcm . --set podMonitor=true
+
+kind-pcm-prometheus-test:
+	kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1
+
+e2e-prometheus: kind-cluster kind-deploy-prometheus kind-pcm-upgrade-prometheus kind-pcm-test kind-pcm-prometheus-test
+
+#
+# VPA
+#
+autoscaler:
+	git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler
+
+kind-deploy-metrics-server:
+	helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
+	helm repo update
+	helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
+
+kind-deploy-vpa: autoscaler
+	./autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh
+
+kind-pcm-vpa:
+	helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true
+
+e2e-vpa: kind-cluster kind-deploy-prometheus kind-deploy-vpa kind-pcm-test  
diff --git a/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml b/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml
deleted file mode 100644
index d26943be..00000000
--- a/deployment/pcm/helm_chart_test_and_notes_TODO/_tests/test-connection.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
-  name: "{{ include "pcm.fullname" . }}-test-connection"
-  labels:
-    {{- include "pcm.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": test
-spec:
-  containers:
-    - name: wget
-      image: busybox
-      command: ['wget']
-      args: ['{{ include "pcm.fullname" . }}:9738']
-  restartPolicy: Never
diff --git a/deployment/pcm/templates/_tests/test-connection.yaml b/deployment/pcm/templates/_tests/test-connection.yaml
index 517a7b7b..3626676c 100644
--- a/deployment/pcm/templates/_tests/test-connection.yaml
+++ b/deployment/pcm/templates/_tests/test-connection.yaml
@@ -16,5 +16,5 @@ spec:
       command: 
       - 'sh' 
       - '-c'
-      - 'sleep 5; ping {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.localg:9739/metrics -O - | grep Measurement_Interval_in_us'
+      - 'sleep 15; ping {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local -t 1 -W 1 -w 1 -c 1 ; wget -T 15 -S {{ include "pcm.fullname" . }}-test-connection.{{ .Release.Namespace }}.svc.cluster.local:9739/metrics -O - | grep Measurement_Interval_in_us'
   restartPolicy: Never

From cb36269e8cb15865a4395e6471b9ff763376b24c Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Thu, 13 Jun 2024 10:54:10 -0100
Subject: [PATCH 7/8] fix with proper names and add NFD/metal case

---
 .gitignore              |  4 +-
 deployment/pcm/Makefile | 84 +++++++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/.gitignore b/.gitignore
index e1845eb8..beb7c8ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,7 +35,5 @@ build
 src/simdjson
 /deployment/pcm/smarter-device-manager/
 /deployment/pcm/nri/
-/deployment/pcm/kind-with-registry.sh
-/deployment/pcm/autoscaler
 /deployment/pcm/pcm-dashboard.json
-/deployment/pcm/kind*
+/deployment/pcm/_kind*
diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile
index c1c56816..3f2f163b 100644
--- a/deployment/pcm/Makefile
+++ b/deployment/pcm/Makefile
@@ -1,5 +1,3 @@
-.PHONY=kind-cluster-clean
-
 # https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile
 define KIND_EXTRA_MOUNTS
 nodes:
@@ -13,71 +11,91 @@ chart-lint-report.txt: values.yaml templates
 	docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt
 
 export KIND_EXTRA_MOUNTS
-kind-with-registry.sh:
-	curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o kind-with-registry.sh.tmp
-	echo "$$KIND_EXTRA_MOUNTS" >kind_extra_mounts.txt
-	sed '/apiVersion: kind.x-k8s.io\/v1alpha4/r kind_extra_mounts.txt' kind-with-registry.sh.tmp >kind-with-registry.sh
-	chmod +x kind-with-registry.sh
-
-kind-cluster: kind-with-registry.sh
-	./kind-with-registry.sh
+_kind_with_registry.sh:
+	curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o _kind_with_registry.sh.tmp
+	echo "$$KIND_EXTRA_MOUNTS" >_kind_extra_mounts.txt
+	sed '/apiVersion: kind.x-k8s.io\/v1alpha4/r _kind_extra_mounts.txt' _kind_with_registry.sh.tmp >_kind_with_registry.sh
+	chmod +x _kind_with_registry.sh
+
+_kind_deploy_cluster: _kind_with_registry.sh
+	./_kind_with_registry.sh
 	kind export kubeconfig
-	touch kind-cluster
+	touch _kind_deploy_cluster
 
 
 clean: 
 	kind delete cluster
 	docker rm -f kind-registry
-	rm -fv kind_extra_mounts.txt
-	rm -fv kind-with-registry.sh
-	rm -fv kind-with-registry.sh.tmp
-	rm -fv kind-deploy-prometheus
+	rm -fv _kind_with_registry.sh
+	rm -fv _kind_extra_mounts.txt
+	rm -fv _kind_with_registry.sh.tmp
+	rm -fv _kind_deploy_cluster
+	rm -fv _kind_deploy_prometheus
+	rm -fv _kind_deploy_pcm
 
 
 #
-# minimal
+# e2e-small: minimal E2e pcm pod only test
 #
-kind-deploy-pcm:
+_kind_deploy_pcm:
 	helm install pcm .
+	touch _kind_deploy_pcm
 
-kind-pcm-test:
+kind_pcm_test:
 	helm test pcm
 
-e2e-small: kind-cluster kind-deploy-pcm kind-pcm-test
+e2e-small: _kind_deploy_cluster _kind_deploy_pcm kind_pcm_test
+
+#
+# e2e-small-metal-nfd: minimal E2e pcm pod only test but with direct approach
+#
+_kind_deploy_pcm_metal:
+	helm upgrade --install pcm . -f values-metal.yaml
+	touch _kind_deploy_pcm_metal
+
+_kind_deploy_nfd:
+	helm upgrade --install pcm . -f values-metal.yaml
+	touch _kind_deploy_pcm_metal
+
+kind_pcm_test:
+	helm test pcm
 
+e2e-small-metal: _kind_deploy_cluster _kind_deploy_pcm_metal kind_pcm_test
 
 #
-# prometheus (pod monitor test)
+# e2e-prometheus: E2E test for podMonitor (pod monitor test)
 #
-kind-deploy-prometheus:
+_kind_deploy_prometheus:
 	helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
 	helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
 	kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1
-	touch kind-deploy-prometheus
+	touch _kind_deploy_prometheus
 
-kind-pcm-upgrade-prometheus:
+_kind_deploy_pcm-with-prometheus:
 	helm upgrade --install pcm . --set podMonitor=true
 
-kind-pcm-prometheus-test:
+kind_pcm_prometheus_test:
 	kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1
 
-e2e-prometheus: kind-cluster kind-deploy-prometheus kind-pcm-upgrade-prometheus kind-pcm-test kind-pcm-prometheus-test
+e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus _kind_deploy_pcm-with-prometheus kind_pcm_test kind_pcm_prometheus_test
 
 #
-# VPA
+# e2e-vpa: VPA E2E tests
 #
-autoscaler:
-	git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler
+_kind_autoscaler:
+	git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler _kind_autoscaler
 
-kind-deploy-metrics-server:
+_kind_deploy_metrics_server:
 	helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
 	helm repo update
 	helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
+	touch _kind_deploy_metrics_server
 
-kind-deploy-vpa: autoscaler
-	./autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh
+_kind_deploy_vpa: autoscaler
+	./_kind_autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh
+	touch _kind_deploy_vpa
 
-kind-pcm-vpa:
+_kind_deploy_pcm_with_vpa:
 	helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true
 
-e2e-vpa: kind-cluster kind-deploy-prometheus kind-deploy-vpa kind-pcm-test  
+e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa _kind_deploy_pcm_with_vpa kind_pcm_test  

From 91b445ecc8c4540a848b59aba4d0bed1e824c1b5 Mon Sep 17 00:00:00 2001
From: Pawel Palucki <pawel.palucki@sap.com>
Date: Tue, 18 Jun 2024 12:17:42 -0100
Subject: [PATCH 8/8] e2e tests: cont

---
 deployment/pcm/Makefile                       | 112 +++++++++++-------
 deployment/pcm/README.md                      |  33 +++++-
 ...alues-metal.yaml => values-metal-nfd.yaml} |   1 -
 deployment/pcm/values.yaml                    |   3 +-
 4 files changed, 101 insertions(+), 48 deletions(-)
 rename deployment/pcm/{values-metal.yaml => values-metal-nfd.yaml} (92%)

diff --git a/deployment/pcm/Makefile b/deployment/pcm/Makefile
index 3f2f163b..3ddcd322 100644
--- a/deployment/pcm/Makefile
+++ b/deployment/pcm/Makefile
@@ -1,3 +1,10 @@
+
+chart-lint-report.txt: values.yaml templates
+	docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt
+
+#
+# kind cluster targets
+#
 # https://stackoverflow.com/questions/649246/is-it-possible-to-create-a-multi-line-string-variable-in-a-makefile
 define KIND_EXTRA_MOUNTS
 nodes:
@@ -7,9 +14,6 @@ nodes:
     containerPath: /sys/fs/resctrl
 endef
 
-chart-lint-report.txt: values.yaml templates
-	docker run -ti --rm -w /pcm -v `realpath $(PWD)/../..`:/pcm quay.io/helmpack/chart-testing ct lint --charts deployment/pcm --validate-maintainers=false | tee chart-lint-report.txt
-
 export KIND_EXTRA_MOUNTS
 _kind_with_registry.sh:
 	curl -sl https://kind.sigs.k8s.io/examples/kind-with-registry.sh -o _kind_with_registry.sh.tmp
@@ -23,64 +27,73 @@ _kind_deploy_cluster: _kind_with_registry.sh
 	touch _kind_deploy_cluster
 
 
-clean: 
-	kind delete cluster
-	docker rm -f kind-registry
-	rm -fv _kind_with_registry.sh
-	rm -fv _kind_extra_mounts.txt
-	rm -fv _kind_with_registry.sh.tmp
-	rm -fv _kind_deploy_cluster
-	rm -fv _kind_deploy_prometheus
-	rm -fv _kind_deploy_pcm
-
-
 #
-# e2e-small: minimal E2e pcm pod only test
+# 1) e2e-default: minimal E2e pcm pod only test
 #
-_kind_deploy_pcm:
+kind_deploy_pcm:
 	helm install pcm .
-	touch _kind_deploy_pcm
+	kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1
 
 kind_pcm_test:
 	helm test pcm
 
-e2e-small: _kind_deploy_cluster _kind_deploy_pcm kind_pcm_test
+e2e-default: _kind_deploy_cluster kind_deploy_pcm kind_pcm_test
 
 #
-# e2e-small-metal-nfd: minimal E2e pcm pod only test but with direct approach
+# 2) e2e-default-local-image: minimal E2e pcm with local image build
 #
-_kind_deploy_pcm_metal:
-	helm upgrade --install pcm . -f values-metal.yaml
-	touch _kind_deploy_pcm_metal
+build_local_image:
+	(cd ../.. ; docker build . -t localhost:5001/pcm-local)
+	docker push localhost:5001/pcm-local
 
-_kind_deploy_nfd:
-	helm upgrade --install pcm . -f values-metal.yaml
-	touch _kind_deploy_pcm_metal
+kind_deploy_pcm_local_image:
+	helm upgrade --install --reset-values --wait pcm . -f values-local-image.yaml
+	kubectl wait daemonset pcm --for=jsonpath='{.spec.template.spec.containers[0].image'}=localhost:5001/pcm-local:latest
+	kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1
 
-kind_pcm_test:
-	helm test pcm
-
-e2e-small-metal: _kind_deploy_cluster _kind_deploy_pcm_metal kind_pcm_test
+e2e-default-local-image: _kind_deploy_cluster build_local_image kind_deploy_pcm_local_image kind_pcm_test
 
 #
-# e2e-prometheus: E2E test for podMonitor (pod monitor test)
+# 3) e2e-prometheus: E2E test for podMonitor (pod monitor test)
 #
 _kind_deploy_prometheus:
 	helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
-	helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
-	kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1
+	helm upgrade --install --reset-values prometheus prometheus-community/kube-prometheus-stack --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false --wait
+	kubectl wait Prometheus prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.availableReplicas}'=1
+	#kubectl wait sts prometheus-prometheus-kube-prometheus-prometheus --for=jsonpath='{.status.replicas}'=1
 	touch _kind_deploy_prometheus
 
-_kind_deploy_pcm-with-prometheus:
-	helm upgrade --install pcm . --set podMonitor=true
+kind_deploy_pcm_with_prometheus:
+	helm upgrade --install --reset-values pcm . --set podMonitor=true
+	kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1
+
+kind_pcm_test_prometheus:
+	kubectl proxy & sleep 10 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1
 
-kind_pcm_prometheus_test:
-	kubectl proxy & sleep 5 && curl -sL http://127.0.0.1:8001/api/v1/namespaces/default/services/prometheus-kube-prometheus-prometheus:http-web/proxy/api/v1/query?query=Measurement_Interval_in_us | grep Measurement_Interval_in_us && kill %1
+e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus kind_deploy_pcm_with_prometheus kind_pcm_test kind_pcm_test_prometheus
+
+#
+# 4) e2e-metal-nfd: e2e thats tests that with node-feature-discovery installed and nfd values are changed, the PCM will be only installed on non hyperviserd system with Intel vendor and RDT available
+#
+_kind_deploy_nfd:
+	#kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v0.16.0-devel
+	helm repo add nfd https://kubernetes-sigs.github.io/node-feature-discovery/charts
+	helm upgrade --install --wait nfd nfd/node-feature-discovery --namespace node-feature-discovery --create-namespace 
+	# please be patient NFD requires around 2 minutes to annotate the node ...
+	kubectl wait node --timeout=2m kind-control-plane --for=jsonpath='{.metadata.labels.feature\.node\.kubernetes\.io\/cpu-model\.vendor_id}'=Intel
+
+kind_deploy_pcm_with_metal_nfd:
+	helm upgrade --install --reset-values pcm . -f values-metal-nfd.yaml
+	kubectl wait daemonset --timeout=2m pcm --for=jsonpath='{.status.numberReady}'=1
+
+kind_pcm_test_nfd:
+	kubectl wait daemonset pcm --timeout=2m --for=jsonpath='{.spec.template.spec.nodeSelector.feature\.node\.kubernetes\.io\/cpu-model\.vendor_id}'=Intel
+	helm test pcm
 
-e2e-prometheus: _kind_deploy_cluster _kind_deploy_prometheus _kind_deploy_pcm-with-prometheus kind_pcm_test kind_pcm_prometheus_test
+e2e-metal-nfd: _kind_deploy_cluster _kind_deploy_nfd kind_deploy_pcm_with_metal_nfd kind_pcm_test
 
 #
-# e2e-vpa: VPA E2E tests
+# 5) e2e-vpa: VPA E2E tests
 #
 _kind_autoscaler:
 	git clone --depth 1 --single-branch https://github.com/kubernetes/autoscaler _kind_autoscaler
@@ -88,14 +101,29 @@ _kind_autoscaler:
 _kind_deploy_metrics_server:
 	helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/
 	helm repo update
-	helm upgrade --install --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
+	helm upgrade --install --reset-values --set args={--kubelet-insecure-tls} metrics-server metrics-server/metrics-server --namespace kube-system
+	kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1
 	touch _kind_deploy_metrics_server
 
 _kind_deploy_vpa: autoscaler
 	./_kind_autoscaler/vertical-pod-autoscaler/hack/vpa-up.sh
 	touch _kind_deploy_vpa
 
-_kind_deploy_pcm_with_vpa:
-	helm upgrade --install pcm . --set verticalPodAutoscaler.enabled=true
+kind_deploy_pcm_with_vpa:
+	helm upgrade --install --reset-values pcm . --set verticalPodAutoscaler.enabled=true
+	kubectl wait daemonset pcm --for=jsonpath='{.status.numberReady}'=1
+
+e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa kind_deploy_pcm_with_vpa kind_pcm_test  
+
+#
+# Cleanup
+#
 
-e2e-vpa: _kind_deploy_cluster _kind_deploy_vpa _kind_deploy_pcm_with_vpa kind_pcm_test  
+clean: 
+	kind delete cluster
+	docker rm -f kind-registry
+	rm -fv _kind_with_registry.sh
+	rm -fv _kind_extra_mounts.txt
+	rm -fv _kind_with_registry.sh.tmp
+	rm -fv _kind_deploy_cluster
+	rm -fv _kind_deploy_prometheus
diff --git a/deployment/pcm/README.md b/deployment/pcm/README.md
index 5518e74f..510ad941 100644
--- a/deployment/pcm/README.md
+++ b/deployment/pcm/README.md
@@ -5,7 +5,7 @@ Helm chart instructions
 ### Features:
 
 - Configurable as non-privileged container (value: `privileged`, default: false) and privileged container,
-- Support for bare-metal and VM host configurations (files: [values-metal.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)),
+- Support for bare-metal and VM host configurations (files: [values-metal-nfd.yaml](values-metal.yaml), [values-vm.yaml](values-vm.yaml)),
 - Ability to deploy multiple releases alongside configured differently to handle different kinds of machines (bare-metal, VM) at the [same time](#heterogeneous-mixed-vmmetal-instances-cluster),
 - Linux Watchdog handling (controlled with `PCM_KEEP_NMI_WATCHDOG`, `PCM_NO_AWS_WORKAROUND`, `nmiWatchdogMount` values).
 - Deploy to own namespace with "helm install ... **-n pcm --create-namespace**".
@@ -77,6 +77,22 @@ More information here: https://kubernetes.io/docs/tutorials/security/ns-level-ps
 - hostPort 9738 is exposed on host. (TODO: security review, consider TLS, together with Prometheus scrapping !!).
 - Prometheus podMonitor is disabled (enabled it with --set podMonitor=true).
 
+### TLS 
+
+TODO:
+- requires pcm-sensor-server to be build with SSL support
+- ERRROR !!!!
+
+```
+mkdir build
+cd build
+cmake .. -DCMAKE_CXX_FLAGS='-DUSE_SSL -lssl'
+zypper install openssl-devel
+make pcm-sensor-server -j
+openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=StateName/L=CityName/O=CompanyName/OU=CompanySectionName/CN=CommonNameOrHostname"
+./bin/pcm-sensor-server -s -p 8443 --certificateFile cert.pem --privateKeyFile key.pem
+```
+
 ### Validation on local kind cluster
 
 #### Requirements
@@ -282,7 +298,7 @@ helm install pcm . -f values-direct-privileged.yaml
 #### Homogeneous bare metal instances cluster (full set of metrics)
 
 ```
-helm install pcm . -f values-metal.yaml
+helm install pcm . -f values-metal-nfd.yaml
 ```
 
 #### Homogenizer VM instances cluster (limited set of metrics core)
@@ -293,10 +309,10 @@ helm install pcm . -f values-vm.yaml
 
 #### Heterogeneous (mixed VM/metal instances) cluster 
 
-values-metal.yaml requires node-feature-discovery to be preinstallaed
+values-metal-nfd.yaml requires node-feature-discovery to be preinstallaed
 ```
 helm install pcm-vm . -f values-vm.yaml
-helm install pcm-metal . -f values-metal.yaml
+helm install pcm-metal . -f values-metal-nfd.yaml
 ```
 
 #### Direct method as non-privileged container (not recommended)
@@ -402,3 +418,12 @@ kubectl run -ti --rm --image busybox pcm-test-connection-manual -- wget -S -T 15
 |               | energy                                                     |                                 |                       | cpucounters.cpp initEnergyMonitoring()                   |                                                     |
 
 
+### E2E tests 
+
+Following end to end tests based on kind enviornment are provided by make targets:
+
+- `e2e-default` - test PCM with default configuration (indirect) and checks connection by calling `helm test`
+- `e2e-default-local-image` - same as above but build and deploys PCM with local image
+- `e2e-prometheus` - test PCM chart with deployed PodMonitor with Prometheus stack and queries Prometheus for collected data,
+- `e2e-vpa` - deploy PCM with VerticalPodAutoscaler (requires metrics-service to be deployed alongside)
+- `e2e-metal-nfd` - test PCM chart on metal scheduled by features exposed by node-feature-discovery (uses: values-metal-nfd.yaml),
diff --git a/deployment/pcm/values-metal.yaml b/deployment/pcm/values-metal-nfd.yaml
similarity index 92%
rename from deployment/pcm/values-metal.yaml
rename to deployment/pcm/values-metal-nfd.yaml
index 1ca73c1e..80a85bc5 100644
--- a/deployment/pcm/values-metal.yaml
+++ b/deployment/pcm/values-metal-nfd.yaml
@@ -5,4 +5,3 @@ PCM_NO_AWS_WORKAROUND: 1
 PCM_KEEP_NMI_WATCHDOG: 0
 nfd: true  
 nfdBaremetalAffinity: true
-nfdRDTAffinity: true
diff --git a/deployment/pcm/values.yaml b/deployment/pcm/values.yaml
index 18643f0c..1bbb9607 100644
--- a/deployment/pcm/values.yaml
+++ b/deployment/pcm/values.yaml
@@ -124,7 +124,8 @@ nriBalloonsPolicyIntegration: false
 nfd: false
 # if enabled daemonset nodeAffinity will require node without feature.node.kubernetes.io/cpu-cpuid.HYPERVISOR flag (requires nfd=true)
 nfdBaremetalAffinity: false
-# if enabled, followin RDT labels will be required for scheduling (requires nfd=true)
+# if enabled, following RDT labels will be required for scheduling (requires nfd=true)
+# TODO: those labels are no longer available with default node-feature-discovery deployment
 # feature.node.kubernetes.io/cpu-rdt.RDTCMT=true
 # feature.node.kubernetes.io/cpu-rdt.RDTL3CA=true
 # feature.node.kubernetes.io/cpu-rdt.RDTMBA=true