diff --git a/.github/scripts/e2e/e2e-validate.sh b/.github/scripts/e2e/e2e-validate.sh
new file mode 100755
index 000000000..3c5a58373
--- /dev/null
+++ b/.github/scripts/e2e/e2e-validate.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# -----------------------------------------------------------------------------
+# e2e-validate.sh — CI e2e Gateway smoke-test (chat + completion, 7 iterations)
+# By default we only test completion curls unless specifed to run chat.
+# -----------------------------------------------------------------------------
+
+show_help() {
+  cat <<EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Options:
+  -n, --namespace NAMESPACE   Kubernetes namespace (default: llm-d)
+  -m, --model MODEL_ID        Model to query.
+  -c, --chatValidation        Enable chat validation testing.
+  -v, --verbose               Echo kubectl/curl commands before running
+  -h, --help                  Show this help and exit
+EOF
+  exit 0
+}
+
+# ── Defaults ────────────────────────────────────────────────────────────────
+NAMESPACE="igw-e2e"
+CLI_MODEL_ID=""
+VERBOSE=false
+TEST_CHAT=false
+
+# ── Flag parsing ────────────────────────────────────────────────────────────
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    -n|--namespace)
+      if [[ -z "$2" ]]; then echo "Error: $1 requires a value." >&2; exit 1; fi
+      NAMESPACE="$2"; shift 2 ;;
+    -m|--model)
+      if [[ -z "$2" ]]; then echo "Error: $1 requires a value." >&2; exit 1; fi
+      CLI_MODEL_ID="$2"; shift 2 ;;
+    -c|--chatValidation)  TEST_CHAT=true; shift ;;
+    -v|--verbose)   VERBOSE=true; shift ;;
+    -h|--help)      show_help ;;
+    *) echo "Unknown option: $1"; show_help ;;
+  esac
+done
+
+if [[ "${VERBOSE}" == "true" ]]; then
+  set -x
+fi
+
+# ── Create a unique pod suffix ────────────────────────────────────────────
+gen_id() { echo $(( RANDOM % 10000 + 1 )); }
+
+# ── Discover Gateway address ────────────────────────────────────────────────
+HOST="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \
+          -o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}"
+if [[ -z "$HOST" ]]; then
+  echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2
+  exit 1
+fi
+PORT=80
+SVC_HOST="${HOST}:${PORT}"
+
+# ── Determine MODEL_ID ──────────────────────────────────────────────────────
+if [[ -n "$CLI_MODEL_ID" ]]; then
+  MODEL_ID="$CLI_MODEL_ID"
+elif [[ -n "${MODEL_ID-}" ]]; then
+  MODEL_ID="$MODEL_ID"
+else
+    echo "Error: Failed to find model id. Please specify one using the -m flag or the MODEL_ID environment variable." >&2
+    exit 1
+fi
+
+echo "Namespace: $NAMESPACE"
+echo "Inference Gateway:   ${SVC_HOST}"
+echo "Model ID:  $MODEL_ID"
+echo
+
+# ── Main test loop (10 iterations) ──────────────────────────────────────────
+for i in {1..7}; do
+  echo "=== Iteration $i of 10 ==="
+  failed=false
+
+  if [[ -n "$TEST_CHAT" ]]; then
+
+    # POST /v1/chat/completions
+    echo "1) POST /v1/chat/completions at ${SVC_HOST}"
+    chat_payload='{
+      "model":"'"$MODEL_ID"'",
+      "messages":[{"role":"user","content":"Hello!  Who are you?"}]
+    }'
+    ID=$(gen_id)
+
+    ret=0
+    output=$(kubectl run --rm -i curl-"$ID" \
+            --namespace "$NAMESPACE" \
+            --image=curlimages/curl --restart=Never \
+            --env "PAYLOAD=$chat_payload" -- \
+            sh -c 'sleep 1; curl -sS -X POST "http://'${SVC_HOST}'/v1/chat/completions" \
+                -H "accept: application/json" \
+                -H "Content-Type: application/json" \
+                -d "$PAYLOAD"') || ret=$?
+    echo "$output"
+    [[ $ret -ne 0 || "$output" != *'{'* ]] && {
+      echo "Error: POST /v1/chat/completions failed (exit $ret or no JSON)" >&2; failed=true; }
+    echo
+  fi
+
+  # POST /v1/completions
+  echo "2) POST /v1/completions at ${SVC_HOST}"
+  payload='{"model":"'"$MODEL_ID"'","prompt":"You are a helpful AI assistant."}'
+  ID=$(gen_id)
+
+  ret=0
+  output=$(kubectl run --rm -i curl-"$ID" \
+          --namespace "$NAMESPACE" \
+          --image=curlimages/curl --restart=Never \
+          --env "PAYLOAD=$payload" -- \
+          sh -c 'sleep 1; curl -sS -X POST "http://'${SVC_HOST}'/v1/completions" \
+               -H "accept: application/json" \
+               -H "Content-Type: application/json" \
+               -d "$PAYLOAD"') || ret=$?
+  echo "$output"
+  [[ $ret -ne 0 || "$output" != *'{'* ]] && {
+    echo "Error: POST /v1/completions failed (exit $ret or no JSON)" >&2; failed=true; }
+  echo
+
+  if $failed; then
+    echo "Iteration $i encountered errors; exiting." >&2
+    exit 1
+  fi
+done
+
+echo "✅ All 10 iterations succeeded."
\ No newline at end of file
diff --git a/.github/workflows/e2e-prefix-cache-aware-gke.yaml b/.github/workflows/e2e-prefix-cache-aware-gke.yaml
new file mode 100644
index 000000000..f5db63580
--- /dev/null
+++ b/.github/workflows/e2e-prefix-cache-aware-gke.yaml
@@ -0,0 +1,279 @@
+name: GKE Prefix Cache Test
+
+on:
+  # Runs with a PR comment /run-gke-prefix-cache
+  issue_comment:
+    types: [created]
+  workflow_dispatch:
+    inputs:
+      pr_or_branch:
+        description: 'Pull-request number or branch name to test'
+        required: true
+        default: 'main'
+        type: string
+
+permissions:
+  contents: read
+
+jobs:
+  deploy_and_validate:
+    if: >
+      github.event_name == 'workflow_dispatch' ||
+      (
+        github.event_name == 'issue_comment' &&
+        github.event.issue.pull_request &&
+        github.event.issue.pull_request.base.ref == 'main' &&
+        contains(github.event.comment.body, '/run-gke-prefix-cache')
+        &&
+        (
+          github.event.comment.author_association == 'OWNER' ||
+          github.event.comment.author_association == 'MEMBER' ||
+          github.event.comment.author_association == 'COLLABORATOR'
+        )
+      )
+    name: Test on ${{ matrix.accelerator.name }}
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        accelerator:
+          - name: GPU
+
+    env:
+      GCP_PROJECT_ID: llm-d-scale
+      GKE_CLUSTER_NAME: llm-d-e2e-us-east5
+      GKE_CLUSTER_ZONE: us-east5
+      NAMESPACE: igw-prefix-cache
+      GATEWAY: gke-l7-regional-external-managed
+      GATEWAY_TYPE: gke
+      PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }}
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      MODEL: meta-llama/Llama-3.1-8B-Instruct
+      GSA_EMAIL: ${{ secrets.GCS_WORKLOAD_SA }}
+      GCS_BUCKET: igw-e2e-benchmark-results
+      KSA_NAME: igw-e2e-benchmark-sa
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      - name: Determine if pr_or_branch is a PR number
+        id: check_pr
+        env:
+          PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch }}
+        shell: bash
+        run: |
+          echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV"
+          if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
+            echo "is_pr=true" >> "$GITHUB_OUTPUT"
+          elif [[ "${{ github.event_name }}" = "pull_request" ]]; then
+            echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+            echo "is_pr=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "is_pr=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Fetch and checkout PR
+        if: steps.check_pr.outputs.is_pr == 'true'
+        run: |
+          git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH"
+          git checkout pr-"$PR_OR_BRANCH"
+
+      - name: Checkout branch
+        if: steps.check_pr.outputs.is_pr == 'false'
+        run: git checkout "$PR_OR_BRANCH"
+
+      - name: Authenticate to Google Cloud
+        id: auth
+        uses: google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5
+        with:
+          credentials_json: ${{ secrets.GCP_SA_KEY }}
+
+      - name: Set up gcloud CLI and kubectl
+        uses: google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397
+        with:
+          project_id: ${{ env.GCP_PROJECT_ID }}
+          install_components: 'kubectl,gke-gcloud-auth-plugin'
+
+      - name: Get GKE credentials
+        run: |
+          gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}"
+
+      - name: Create namespace
+        run: |
+          kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists"
+
+      - name: Create hf-token secret
+        run: |
+          kubectl create secret generic hf-token \
+            --from-literal="token=${{ secrets.HF_TOKEN }}" \
+            --namespace "${NAMESPACE}" \
+            --dry-run=client -o yaml | kubectl apply -f -
+
+      - name: Create and Annotate KSA for Workload Identity
+        run: |     
+          kubectl create serviceaccount $KSA_NAME --namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f -
+          kubectl annotate serviceaccount $KSA_NAME \
+            iam.gke.io/gcp-service-account=$GSA_EMAIL \
+            --overwrite \
+            --namespace "${NAMESPACE}"
+
+      - name: Deploy Model Server and CRDs
+        run: |
+          cd config/manifests/vllm
+          sed -i '/- --model/a\          - --enable-prefix-caching' gpu-deployment.yaml
+          echo "Deploying Model Server..."
+          kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log
+          echo "Installing CRDs"
+          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml
+          echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
+
+      - name: Deploy InferencePool and Endpoint Picker Extension
+        run: |
+          export IGW_CHART_VERSION=v1.1.0
+          helm install vllm-llama3-8b-instruct \
+          --namespace $NAMESPACE \
+          --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
+          --set provider.name=$GATEWAY_TYPE \
+          --version $IGW_CHART_VERSION \
+          oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-prefix-cache-deployment.log
+          echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
+
+      - name: Wait for all pods to be ready
+        run: |
+          kubectl wait pod \
+            --for=condition=Ready \
+            --all \
+            -n "${NAMESPACE}" \
+            --timeout=25m
+          echo "✅ All pods are ready."
+          kubectl get pods -n "${NAMESPACE}"
+
+      - name: Deploy Gateway
+        run: |
+          GATEWAY_NAME=inference-gateway
+          kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
+          kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found
+          echo "Deploying Gateway..."
+          kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log 
+          echo "Deploying HTTPRoute..."
+          kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/httproute.yaml -n ${NAMESPACE} | tee ~/igw-prefix-cache-deployment.log
+          echo "---------------------------------------" >> ~/igw-prefix-cache-deployment.log
+
+      - name: Wait for gateway to be ready
+        run: |
+          GATEWAY_NAME=inference-gateway
+          kubectl wait gateway/${GATEWAY_NAME} \
+            --for=condition=Programmed=True \
+            -n "${NAMESPACE}" \
+            --timeout=500s
+          echo "✅ Gateway is ready."
+          kubectl get gateway -n "${NAMESPACE}"
+
+      - name: Show deployment status
+        run: |
+          echo "=== Deployments ==="
+          kubectl get deployments -n "${NAMESPACE}"
+          echo ""
+          echo "=== Pods ==="
+          kubectl get pods -n "${NAMESPACE}"
+          echo ""
+          echo "=== Services ==="
+          kubectl get svc -n "${NAMESPACE}"
+          echo ""
+          echo "=== Helm releases ==="
+          helm list -n "${NAMESPACE}" || true
+          echo ""
+          echo "=== Inference Pools ==="
+          kubectl get inferencepools -n "${NAMESPACE}" || true
+          echo ""
+          echo "=== HTTPRoutes ==="
+          kubectl get httproutes -n "${NAMESPACE}" -o yaml || true
+          echo ""
+          echo "=== Gateway ==="
+          kubectl get Gateway -n "${NAMESPACE}" || true
+          echo ""
+
+      - name: Verify installation and run validation test
+        run: |
+          cd .github/scripts/e2e
+          ./e2e-validate.sh -n "${NAMESPACE}" -v -m ${MODEL}
+
+      - name: Run benchmarking test
+        run: |
+          TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S")
+          cd benchmarking/prefix-cache-aware
+          host="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \
+          -o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}"
+          if [[ -z "$host" ]]; then
+            echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2
+            exit 1
+          fi
+          port=80
+          svc_host="${host}:${port}"
+          helm install prefix-cache-benchmark ../inference-perf/ -f high-cache-values.yaml \
+            --namespace "${NAMESPACE}" \
+            --create-namespace \
+            --set hfToken="${HF_TOKEN}" \
+            --set "config.server.base_url=http://${svc_host}" \
+            --set "job.serviceAccountName=$KSA_NAME" \
+            --set "job.image.tag=v0.2.0" \
+            --set "config.storage.google_cloud_storage.bucket_name=${GCS_BUCKET}" \
+            --set "config.storage.google_cloud_storage.path=${NAMESPACE}/${TIMESTAMP}" \
+            --set-string 'job.resources.limits.nvidia\.com/gpu=1'
+
+      - name: Wait for benchmarking job to finish
+        run: |
+          job_name=prefix-cache-benchmark-inference-perf-job
+          TIMEOUT_DURATION="7200s"
+          if ! kubectl wait --for=condition=complete job/"$job_name" -n "$NAMESPACE" --timeout="$TIMEOUT_DURATION"; then
+            echo "Error: Benchmark job $job_name did not complete successfully within $TIMEOUT_DURATION." >&2
+            echo "--- Job Description ---" >&2
+            kubectl describe job "$job_name" -n "$NAMESPACE" >&2
+            echo "--- Pod Logs (Last 50 lines) ---" >&2
+            kubectl logs -l job-name="$job_name" -n "$NAMESPACE" --all-containers=true --tail 50 >&2
+            exit 1
+          fi
+          echo "✅ Benchmarking Job Completed."
+
+      - name: Collect and upload Kubernetes pod logs
+        if: always()
+        run: |
+            mkdir -p pod-logs-inference-prefix-cache
+            cd pod-logs-inference-prefix-cache
+            echo "Fetching ${NAMESPACE} pods log..."
+            kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
+            | xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1'
+            echo "Fetching ${NAMESPACE} pods descriptions..."
+            kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
+            | xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1'
+            mv ~/igw-prefix-cache-deployment.log . || true
+            mv ~/install-deps.log . || true
+
+      - name: Upload pod logs as artifact
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: igw-pod-logs-inference-prefix-cache-${{ matrix.accelerator.name }}
+          path: pod-logs-inference-prefix-cache
+
+      - name: Send Google Chat notification on failure
+        if: failure()
+        uses: SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb
+        with:
+          webhookUrl: ${{ secrets.GOOGLE_CHAT_WEBHOOK }}
+          jobStatus: ${{ job.status }}
+          title: '${{ github.workflow }} - ${{ matrix.accelerator.name }}'
+
+      - name: Cleanup deployment
+        if: always()
+        run: |
+          GATEWAY_NAME=inference-gateway
+          helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE} --ignore-not-found
+          helm uninstall prefix-cache-benchmark -n ${NAMESPACE} --ignore-not-found
+          kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
+          kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found
\ No newline at end of file
diff --git a/benchmarking/benchmark-values.yaml b/benchmarking/benchmark-values.yaml
index 0a55355d8..774f412f5 100644
--- a/benchmarking/benchmark-values.yaml
+++ b/benchmarking/benchmark-values.yaml
@@ -1,9 +1,9 @@
 job:
   image:
     repository: quay.io/inference-perf/inference-perf
-    tag: "latest" # Defaults to .Chart.AppVersion
-  serviceAccountName: ""
+    tag: "" # Defaults to .Chart.AppVersion
   nodeSelector: {}
+  serviceAccountName: ""
   # Example resources:
   # resources:
   #   requests:
@@ -18,19 +18,27 @@ logLevel: INFO
 
 # A GCS bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/gcs-dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/gcs-dataset.json.
+# at /gcsDataset/gcs-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /gcsDataset/gcs-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 gcsPath: ""
 
-# A S3 bucket path that points to the dataset file.
+# An S3 bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/s3-dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
+# at /s3Dataset/s3-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /s3Dataset/s3-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 s3Path: ""
 
-# hfToken optionally creates a secret with the specified token.
-# Can be set using helm install --set hftoken=<token>
-hfToken: ""
+# Optional Token configuration for Hugging Face authentication.
+# hfSecret: Configures a pre-existing Kubernetes Secret.
+# hfToken: Creates a new kubernetes secret with the specified token.
+# If both specified, 'hfSecret' takes precedence over 'hfToken'.
+token:
+  hfSecret:
+    name: "" # The name of the existing Secret (e.g., 'my-hf-secret').
+    key: ""  # The key within the Secret that holds the token value (e.g., 'token' or 'hf-token').
+  hfToken: ""
 
 config:
   load:
diff --git a/benchmarking/inference-perf/Chart.yaml b/benchmarking/inference-perf/Chart.yaml
index 0295e06ef..93070f490 100644
--- a/benchmarking/inference-perf/Chart.yaml
+++ b/benchmarking/inference-perf/Chart.yaml
@@ -3,4 +3,4 @@ name: inference-perf
 description: A Helm chart for running inference-perf benchmarking tool
 type: application
 version: 0.2.0
-appVersion: "0.2.0"
+appVersion: "v0.2.0"
diff --git a/benchmarking/inference-perf/README.md b/benchmarking/inference-perf/README.md
index 54fdd3edd..946fe5db8 100644
--- a/benchmarking/inference-perf/README.md
+++ b/benchmarking/inference-perf/README.md
@@ -19,11 +19,22 @@ Make sure you have the following tools installed and configured:
 
 Before deployment, navigate to the **`deploy/inference-perf`** directory and edit the **`values.yaml`** file to customize your deployment and the benchmark parameters.
 
-#### Optional Parameters
+#### Optional Token Parameters
+Hugging Face token can be provided either by providing a value (`hfToken`) or by referencing an existing Kubernetes Secret (`hfSecret.Name` and `hfSecret.Key`).
+
+> If both `hfToken` and the `hfSecret` parameters are provided, the chart logic is configured to prioritize the `hfSecret` reference.
 
 | Key | Description | Default |
 | :--- | :--- | :--- |
 | `hfToken` | Hugging Face API token. If provided, a Kubernetes `Secret` named `hf-token-secret` will be created for authentication. | `""` |
+| `hfSecret.name` | The name of a pre-existing Kubernetes Secret that contains a Hugging Face API token. | `""` |
+| `hfSecret.key` | The key within the pre-existing Kubernetes Secret that holds the token value. | `""` |
+---
+
+#### Optional Job Parameters
+
+| Key | Description | Default |
+| :--- | :--- | :--- |
 | `serviceAccountName` | Standard Kubernetes `serviceAccountName`. If not provided, default service account is used. | `""` |
 | `nodeSelector` |  Standard Kubernetes `nodeSelector` map to constrain pod placement to nodes with matching labels. | `{}` |
 | `resources` | Standard Kubernetes resource requests and limits for the main `inference-perf` container. | `{}` |
@@ -54,7 +65,29 @@ The identity executing the workload (e.g., the associated Kubernetes Service Acc
 
 | Key | Description | Default |
 | :--- | :--- | :--- |
-| `gcsPath` | A GCS URI pointing to the dataset file (e.g., `gs://my-bucket/dataset.json`). The file will be automatically copied to the running pod during initialization. | `""` |
+| `gcsPath` | A GCS bucket name pointing to the dataset file (e.g., `<my-bucket-path-to-file>/dataset.json`). The file will be automatically copied to the running pod during initialization. The file will be copied to `gcsDataset/dataset.json` | `""` |
+
+---
+
+#### AWS Specific Parameters
+
+This section details the necessary configuration and permissions for using an S3 path to manage your dataset, typical for deployments on AWS EKS.
+
+##### Required IAM Permissions
+
+The identity executing the workload (e.g., the associated Kubernetes Service Account, often configured via IRSA - IAM Roles for Service Accounts) must possess an associated AWS IAM Policy that grants the following S3 Actions on the target S3 bucket for data transfer:
+
+* **S3 Read/Download (Object Access)**
+    * Action: `s3:GetObject` (Required to download the input dataset from S3).
+    * Action: `s3:ListBucket` (Often required to check for the file's existence and list bucket contents).
+
+* **S3 Write/Upload (Object Creation)**
+    * Action: `s3:PutObject` (Required to upload benchmark results back to S3).
+
+
+| Key | Description | Default |
+| :--- | :--- | :--- |
+| `s3Path` | An S3 bucket name pointing to the dataset file (e.g., `<my-bucket-path-to-file>/dataset.json`). The file will be automatically copied to the running pod during initialization. The file will be copied to `s3Dataset/dataset.json` | `""` |
 
 ---
 
@@ -80,6 +113,6 @@ Use the **`helm install`** command from the **`deploy/inference-perf`** director
 ### 4. Cleanup
 
 To remove the benchmark deployment.
-    ```bash
+```bash
     helm uninstall test
-    ```
\ No newline at end of file
+```
\ No newline at end of file
diff --git a/benchmarking/inference-perf/templates/job.yaml b/benchmarking/inference-perf/templates/job.yaml
index b581537db..936176591 100644
--- a/benchmarking/inference-perf/templates/job.yaml
+++ b/benchmarking/inference-perf/templates/job.yaml
@@ -23,19 +23,19 @@ spec:
       initContainers:
         - name: fetch-gcs-dataset
           image: google/cloud-sdk:latest
-          command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/gcs-dataset.json"]
+          command: ["sh", "-c", "gsutil cp gs://{{ .Values.gcsPath }} /gcsDataset/gcs-dataset.json"]
           volumeMounts:
-            - name: dataset-volume
-              mountPath: /dataset
+            - name: gcs-dataset-volume
+              mountPath: /gcsDataset
 {{- end }}
 {{- if .Values.s3Path}}
       initContainers:
         - name: fetch-s3-dataset
           image: google/cloud-sdk:latest
-          command: ["sh", "-c", "aws s3 cp s3://{{ .Values.s3Path }} /dataset/s3-dataset.json"]
+          command: ["sh", "-c", "aws s3 cp s3://{{ .Values.s3Path }} /s3Dataset/s3-dataset.json"]
           volumeMounts:
-            - name: dataset-volume
-              mountPath: /dataset
+            - name: s3-dataset-volume
+              mountPath: /s3dataset
 {{- end }}
       containers:
         - name: inference-perf-container
@@ -47,20 +47,42 @@ spec:
             - "--log-level"
             - {{ .Values.logLevel }}
           env:
-            {{- if .Values.hfToken }}
+{{- if and .Values.token.hfSecret.name .Values.token.hfSecret.key }}
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.token.hfSecret.name }}
+                  key: {{ .Values.token.hfSecret.key }}
+{{- else if .Values.token.hfToken }}
             - name: HF_TOKEN
               valueFrom:
                 secretKeyRef:
                   name: {{ include "inference-perf.hfSecret" . }}
                   key: {{ include "inference-perf.hfKey" . }}
-            {{- end }}
+{{- end }}
           volumeMounts:
             - name: config-volume
               mountPath: {{ include "inference-perf.configMount" . }}
               readOnly: true
+{{- if .Values.gcsPath}}
+            - name: gcs-dataset-volume
+              mountPath: /gcsDataset
+{{- end }}
+{{- if .Values.s3Path}}
+            - name: s3-dataset-volume
+              mountPath: /s3Dataset
+{{- end }}
           resources:
             {{- toYaml .Values.job.resources | nindent 12 }}
       volumes:
         - name: config-volume
           configMap:
             name: {{ include "inference-perf.fullname" . }}-config
+{{- if .Values.gcsPath}}
+        - name: gcs-dataset-volume
+          emptyDir: {}
+{{- end }}
+{{- if .Values.s3Path}}
+        - name: s3-dataset-volume
+          emptyDir: {}
+{{- end }}
diff --git a/benchmarking/prefix-cache-aware/high-cache-values.yaml b/benchmarking/prefix-cache-aware/high-cache-values.yaml
index da8494088..95fb8d721 100644
--- a/benchmarking/prefix-cache-aware/high-cache-values.yaml
+++ b/benchmarking/prefix-cache-aware/high-cache-values.yaml
@@ -2,9 +2,9 @@
 job:
   image:
     repository: quay.io/inference-perf/inference-perf
-    tag: "0.2.0" # Defaults to .Chart.AppVersion
-  serviceAccountName: ""
+    tag: "" # Defaults to .Chart.AppVersion
   nodeSelector: {}
+  serviceAccountName: ""
   # Example resources:
   # resources:
   #   requests:
@@ -19,19 +19,27 @@ logLevel: INFO
 
 # A GCS bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
+# at /gcsDataset/gcs-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /gcsDataset/gcs-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 gcsPath: ""
 
 # An S3 bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/s3-dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
+# at /s3Dataset/s3-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /s3Dataset/s3-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 s3Path: ""
 
-# hfToken optionally creates a secret with the specified token.
-# Can be set using helm install --set hftoken=<token>
-hfToken: ""
+# Optional Token configuration for Hugging Face authentication.
+# hfSecret: Configures a pre-existing Kubernetes Secret.
+# hfToken: Creates a new kubernetes secret with the specified token.
+# If both specified, 'hfSecret' takes precedence over 'hfToken'.
+token:
+  hfSecret:
+    name: "" # The name of the existing Secret (e.g., 'my-hf-secret').
+    key: ""  # The key within the Secret that holds the token value (e.g., 'token' or 'hf-token').
+  hfToken: ""
 
 config:
   load:
@@ -40,20 +48,10 @@ config:
     stages:
     - rate: 100
       duration: 30
-    - rate: 200
-      duration: 30
     - rate: 300
       duration: 30
-    - rate: 400
-      duration: 30
     - rate: 500
       duration: 30
-    - rate: 600
-      duration: 30
-    - rate: 700
-      duration: 30
-    - rate: 800
-      duration: 30
     worker_max_concurrency: 1000
   api:
     type: completion
diff --git a/benchmarking/prefix-cache-aware/low-cache-values.yaml b/benchmarking/prefix-cache-aware/low-cache-values.yaml
index ac44735cd..fdd2a3269 100644
--- a/benchmarking/prefix-cache-aware/low-cache-values.yaml
+++ b/benchmarking/prefix-cache-aware/low-cache-values.yaml
@@ -2,9 +2,9 @@
 job:
   image:
     repository: quay.io/inference-perf/inference-perf
-    tag: "0.2.0" # Defaults to .Chart.AppVersion
-  serviceAccountName: ""
+    tag: "" # Defaults to .Chart.AppVersion
   nodeSelector: {}
+  serviceAccountName: ""
   # Example resources:
   # resources:
   #   requests:
@@ -19,19 +19,27 @@ logLevel: INFO
 
 # A GCS bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
+# at /gcsDataset/gcs-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /gcsDataset/gcs-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 gcsPath: ""
 
 # An S3 bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/s3-dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
+# at /s3Dataset/s3-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /s3Dataset/s3-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 s3Path: ""
 
-# hfToken optionally creates a secret with the specified token.
-# Can be set using helm install --set hftoken=<token>
-hfToken: ""
+# Optional Token configuration for Hugging Face authentication.
+# hfSecret: Configures a pre-existing Kubernetes Secret.
+# hfToken: Creates a new kubernetes secret with the specified token.
+# If both specified, 'hfSecret' takes precedence over 'hfToken'.
+token:
+  hfSecret:
+    name: "" # The name of the existing Secret (e.g., 'my-hf-secret').
+    key: ""  # The key within the Secret that holds the token value (e.g., 'token' or 'hf-token').
+  hfToken: ""
 
 config:
   load:
@@ -40,20 +48,10 @@ config:
     stages:
     - rate: 100
       duration: 30
-    - rate: 200
-      duration: 30
     - rate: 300
       duration: 30
-    - rate: 400
-      duration: 30
     - rate: 500
       duration: 30
-    - rate: 600
-      duration: 30
-    - rate: 700
-      duration: 30
-    - rate: 800
-      duration: 30
     worker_max_concurrency: 1000
   api:
     type: completion
diff --git a/benchmarking/single-workload/decode-heavy-values.yaml b/benchmarking/single-workload/decode-heavy-values.yaml
index a5811e021..03ee4e91b 100644
--- a/benchmarking/single-workload/decode-heavy-values.yaml
+++ b/benchmarking/single-workload/decode-heavy-values.yaml
@@ -2,9 +2,9 @@
 job:
   image:
     repository: quay.io/inference-perf/inference-perf
-    tag: "0.2.0" # Defaults to .Chart.AppVersion
-  serviceAccountName: ""
+    tag: "" # Defaults to .Chart.AppVersion
   nodeSelector: {}
+  serviceAccountName: ""
   # Example resources:
   # resources:
   #   requests:
@@ -19,19 +19,27 @@ logLevel: INFO
 
 # A GCS bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
+# at /gcsDataset/gcs-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /gcsDataset/gcs-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 gcsPath: ""
 
 # An S3 bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/s3-dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
+# at /s3Dataset/s3-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /s3Dataset/s3-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 s3Path: ""
 
-# hfToken optionally creates a secret with the specified token.
-# Can be set using helm install --set hftoken=<token>
-hfToken: ""
+# Optional Token configuration for Hugging Face authentication.
+# hfSecret: Configures a pre-existing Kubernetes Secret.
+# hfToken: Creates a new kubernetes secret with the specified token.
+# If both specified, 'hfSecret' takes precedence over 'hfToken'.
+token:
+  hfSecret:
+    name: "" # The name of the existing Secret (e.g., 'my-hf-secret').
+    key: ""  # The key within the Secret that holds the token value (e.g., 'token' or 'hf-token').
+  hfToken: ""
 
 config:
   load:
diff --git a/benchmarking/single-workload/prefill-heavy-values.yaml b/benchmarking/single-workload/prefill-heavy-values.yaml
index 4ab447090..979286d9d 100644
--- a/benchmarking/single-workload/prefill-heavy-values.yaml
+++ b/benchmarking/single-workload/prefill-heavy-values.yaml
@@ -2,9 +2,9 @@
 job:
   image:
     repository: quay.io/inference-perf/inference-perf
-    tag: "0.2.0" # Defaults to .Chart.AppVersion
-  serviceAccountName: ""
+    tag: "" # Defaults to .Chart.AppVersion
   nodeSelector: {}
+  serviceAccountName: ""
   # Example resources:
   # resources:
   #   requests:
@@ -19,19 +19,27 @@ logLevel: INFO
 
 # A GCS bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json.
+# at /gcsDataset/gcs-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /gcsDataset/gcs-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 gcsPath: ""
 
 # An S3 bucket path that points to the dataset file.
 # The file will be copied from this path to the local file system
-# at /dataset/s3-dataset.json for use during the run.
-# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/s3-dataset.json.
+# at /s3Dataset/s3-dataset.json for use during the run.
+# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /s3Dataset/s3-dataset.json.
+# Format: bucket-name/folder/to/dataset/file
 s3Path: ""
 
-# hfToken optionally creates a secret with the specified token.
-# Can be set using helm install --set hftoken=<token>
-hfToken: ""
+# Optional Token configuration for Hugging Face authentication.
+# hfSecret: Configures a pre-existing Kubernetes Secret.
+# hfToken: Creates a new kubernetes secret with the specified token.
+# If both specified, 'hfSecret' takes precedence over 'hfToken'.
+token:
+  hfSecret:
+    name: "" # The name of the existing Secret (e.g., 'my-hf-secret').
+    key: ""  # The key within the Secret that holds the token value (e.g., 'token' or 'hf-token').
+  hfToken: ""
 
 config:
   load:
diff --git a/site-src/performance/benchmark/advanced-configs/decode-heavy-workload.md b/site-src/performance/benchmark/advanced-configs/decode-heavy-workload.md
index adec5bb50..323084b7c 100644
--- a/site-src/performance/benchmark/advanced-configs/decode-heavy-workload.md
+++ b/site-src/performance/benchmark/advanced-configs/decode-heavy-workload.md
@@ -50,18 +50,32 @@ This configuration is optimized for scenarios where a high cache hit rate is exp
     ```bash
     export IP='<YOUR_IP>'
     export PORT='<YOUR_PORT>'
+
+    # HUGGINGFACE PARAMETERS
+    # Option A: Pass Token Directly
     export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+    # Option B: Use Existing Kubernetes Secret
+    export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+    export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
     helm install decode-heavy ../inference-perf -f decode-heavy-values.yaml \
-      --set hfToken=${HF_TOKEN} \
       --set "config.server.base_url=http://${IP}:${PORT}" \
       --set "config.data.path=/dataset/gcs-dataset.json" \
       --set "gcsPath=<PATH TO DATASET FILE ON GCS BUCKET>"
+      # HUGGINGFACE OPTION A
+      # ------------------------------------------------
+      --set token.hfToken=${HF_TOKEN} \ 
+      # HUGGINGFACE OPTION B
+      # --set token.hfSecret.name=${HF_SECRET_NAME} \
+      # --set token.hfSecret.key=${HF_SECRET_KEY} \
+      # ------------------------------------------------
     ```
     **Parameters to customize:**
     
     *   `decode-heavy`: A unique name for this deployment.
-    *   `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
-    *   `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
+    *   `token.hfToken`: Your hugging face token. Inference Perf chart will create a new kubernetes secret containing this token.
+    *   `hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+    *   `hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
     *   `config.server.base_url`: The base URL (IP and port) of your inference server for the high-cache scenario.
     *   `gcsPath`: The path to the downloaded dataset file hosted on your gcs bucket. 
 
@@ -70,18 +84,32 @@ This configuration is optimized for scenarios where a high cache hit rate is exp
     ```bash
     export IP='<YOUR_IP>'
     export PORT='<YOUR_PORT>'
+
+    # HUGGINGFACE PARAMETERS
+    # Option A: Pass Token Directly
     export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+    # Option B: Use Existing Kubernetes Secret
+    export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+    export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
     helm install decode-heavy ../inference-perf -f decode-heavy-values.yaml \
-      --set hfToken=${HF_TOKEN} \
       --set "config.server.base_url=http://${IP}:${PORT}" \
       --set "config.data.path=/dataset/s3-dataset.json" \
       --set "s3Path=<PATH TO DATASET FILE ON S3 BUCKET>"
+      # HUGGINGFACE OPTION A
+      # ------------------------------------------------
+      --set token.hfToken=${HF_TOKEN} \ 
+      # HUGGINGFACE OPTION B
+      # --set token.hfSecret.name=${HF_SECRET_NAME} \
+      # --set token.hfSecret.key=${HF_SECRET_KEY} \
+      # ------------------------------------------------
     ```
     **Parameters to customize:**
     
     *   `decode-heavy`: A unique name for this deployment.
-    *   `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
-    *   `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
+    *  `token.hfToken`: Your hugging face token. Inference Perf chart will create a new kubernetes secret containing this token.
+    *   `hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+    *   `hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
     *   `config.server.base_url`: The base URL (IP and port) of your inference server for the high-cache scenario.
     *   `s3Path`: The path to the downloaded dataset file hosted on your s3 bucket. 
 
diff --git a/site-src/performance/benchmark/advanced-configs/prefill-heavy-workload.md b/site-src/performance/benchmark/advanced-configs/prefill-heavy-workload.md
index d31f99fee..99ada4b2d 100644
--- a/site-src/performance/benchmark/advanced-configs/prefill-heavy-workload.md
+++ b/site-src/performance/benchmark/advanced-configs/prefill-heavy-workload.md
@@ -50,18 +50,33 @@ This configuration is optimized for scenarios where a high cache hit rate is exp
     ```bash
     export IP='<YOUR_IP>'
     export PORT='<YOUR_PORT>'
+
+    # HUGGINGFACE PARAMETERS
+    # Option A: Pass Token Directly
     export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+    # Option B: Use Existing Kubernetes Secret
+    export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+    export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
     helm install prefill-heavy ../inference-perf -f prefill-heavy-values.yaml \
-      --set hfToken=${HF_TOKEN} \
       --set "config.server.base_url=http://${IP}:${PORT}" \
       --set "config.data.path=/dataset/gcs-dataset.json" \
       --set "gcsPath=<PATH TO DATASET FILE ON GCS BUCKET>"
+      # ------------------------------------------------
+      # HUGGINGFACE OPTION A
+      --set token.hfToken=${HF_TOKEN} \ 
+      # ------------------------------------------------
+      # HUGGINGFACE OPTION B
+      # --set token.hfSecret.name=${HF_SECRET_NAME} \
+      # --set token.hfSecret.key=${HF_SECRET_KEY} \
+      # ------------------------------------------------
     ```
     **Parameters to customize:**
     
     *   `prefill-heavy`: A unique name for this deployment.
-    *   `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
-    *   `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
+    *   `token.hfToken`: Your hugging face token. Inference Perf chart will create a new kubernetes secret containing this token.
+    *   `hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+    *   `hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
     *   `config.server.base_url`: The base URL (IP and port) of your inference server for the high-cache scenario.
     *   `gcsPath`: The path to the downloaded dataset file hosted on your gcs bucket. 
 
@@ -70,19 +85,34 @@ This configuration is optimized for scenarios where a high cache hit rate is exp
     ```bash
     export IP='<YOUR_IP>'
     export PORT='<YOUR_PORT>'
+
+    # HUGGINGFACE PARAMETERS
+    # Option A: Pass Token Directly
     export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+    # Option B: Use Existing Kubernetes Secret
+    export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+    export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
     helm install prefill-heavy ../inference-perf -f prefill-heavy-values.yaml \
-      --set hfToken=${HF_TOKEN} \
       --set "config.server.base_url=http://${IP}:${PORT}" \
       --set "config.data.path=/dataset/s3-dataset.json" \
       --set "s3Path=<PATH TO DATASET FILE ON S3 BUCKET>"
+      # ------------------------------------------------
+      # HUGGINGFACE OPTION A
+      --set token.hfToken=${HF_TOKEN} \ 
+      # ------------------------------------------------
+      # HUGGINGFACE OPTION B
+      # --set token.hfSecret.name=${HF_SECRET_NAME} \
+      # --set token.hfSecret.key=${HF_SECRET_KEY} \
+      # ------------------------------------------------
     ```
 
     **Parameters to customize:**
     
     *   `prefill-heavy`: A unique name for this deployment.
-    *   `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
-    *   `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
+    *   `token.hfToken`: Your hugging face token. Inference Perf chart will create a new kubernetes secret containing this token.
+    *   `hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+    *   `hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
     *   `config.server.base_url`: The base URL (IP and port) of your inference server for the high-cache scenario.
     *   `s3Path`: The path to the downloaded dataset file hosted on your s3 bucket. 
 
diff --git a/site-src/performance/benchmark/advanced-configs/prefix-cache-aware.md b/site-src/performance/benchmark/advanced-configs/prefix-cache-aware.md
index fd7da333d..0b35c7cd5 100644
--- a/site-src/performance/benchmark/advanced-configs/prefix-cache-aware.md
+++ b/site-src/performance/benchmark/advanced-configs/prefix-cache-aware.md
@@ -62,17 +62,32 @@ This configuration is optimized for scenarios where a high cache hit rate is exp
 ```bash
 export IP='<YOUR_IP>'
 export PORT='<YOUR_PORT>'
+
+# HUGGINGFACE PARAMETERS
+# Option A: Pass Token Directly
 export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+# Option B: Use Existing Kubernetes Secret
+export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
 helm install high-cache ../inference-perf -f high-cache-values.yaml \
-  --set hfToken=${HF_TOKEN} \
   --set "config.server.base_url=http://${IP}:${PORT}"
+  # ------------------------------------------------
+  # HUGGINGFACE OPTION A
+  --set token.hfToken=${HF_TOKEN} \ 
+  # ------------------------------------------------
+  # HUGGINGFACE OPTION B
+  # --set token.hfSecret.name=${HF_SECRET_NAME} \
+  # --set token.hfSecret.key=${HF_SECRET_KEY} \
+  # ------------------------------------------------
 ```
 
 **Parameters to customize:**
 
 *   `high-cache`: A unique name for this deployment.
-*   `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
-*   `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
+*   `token.hfToken`: Your hugging face token. Inference Perf chart will create a new kubernetes secret containing this token.
+*   `hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+*   `hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
 *   `config.server.base_url`: The base URL (IP and port) of your inference server for the high-cache scenario.
 
 ### 4. Deploying the Low-Cache Configuration
@@ -83,17 +98,32 @@ This configuration is designed for scenarios with a lower cache hit rate. It use
 cd gateway-api-inference-extension/benchmarking/prefix-cache-aware
 export IP='<YOUR_IP>'
 export PORT='<YOUR_PORT>'
+
+# HUGGINGFACE PARAMETERS
+# Option A: Pass Token Directly
 export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+# Option B: Use Existing Kubernetes Secret
+export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
 helm install low-cache ../inference-perf -f low-cache-values.yaml \
-  --set hfToken=${HF_TOKEN} \
   --set "config.server.base_url=http://${IP}:${PORT}"
+  # ------------------------------------------------
+  # HUGGINGFACE OPTION A
+  --set token.hfToken=${HF_TOKEN} \ 
+  # ------------------------------------------------
+  # HUGGINGFACE OPTION B
+  # --set token.hfSecret.name=${HF_SECRET_NAME} \
+  # --set token.hfSecret.key=${HF_SECRET_KEY} \
+  # ------------------------------------------------
 ```
 
 **Parameters to customize:**
 
 *   `low-cache`: A unique name for this deployment.
-*   `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
-*   `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
+*   `token.hfToken`: Your hugging face token. Inference Perf chart will create a new kubernetes secret containing this token.
+*   `hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+*   `hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
 *   `config.server.base_url`: The base URL (IP and port) of your inference server for the high-cache scenario.
 
 ## Clean Up
diff --git a/site-src/performance/benchmark/index.md b/site-src/performance/benchmark/index.md
index c57b33825..cbc11f61f 100644
--- a/site-src/performance/benchmark/index.md
+++ b/site-src/performance/benchmark/index.md
@@ -40,7 +40,9 @@ parallel against different targets.
 For more parameter customizations, refer to inference-perf [guides](https://github.com/kubernetes-sigs/inference-perf/blob/main/docs/config.md)
 
 *   `benchmark`: A unique name for this deployment.
-*   `hfToken`: Your hugging face token.
+*   `token.hfToken`: Your hugging face token
+*   `token.hfSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
+*   `token.hfSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
 *   `config.server.base_url`: The base URL (IP and port) of your inference server.
 
 ### Storage Parameters
@@ -119,16 +121,44 @@ echo $SVC_IP
 
 ```bash
 export PORT='<YOUR_PORT>'
+
+# HUGGINGFACE PARAMETERS
+# Option A: Pass Token Directly
 export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+# Option B: Use Existing Kubernetes Secret
+export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
 helm install igw-benchmark inference-perf/ -f benchmark-values.yaml \
---set hfToken=${HF_TOKEN} \
---set "config.server.base_url=http://${GW_IP}:${PORT}"
+--set "config.server.base_url=http://${GW_IP}:${PORT}" \
+# ------------------------------------------------
+# HUGGINGFACE OPTION A
+--set token.hfToken=${HF_TOKEN}
+# ------------------------------------------------
+# HUGGINGFACE OPTION B
+# --set token.hfSecret.name=${HF_SECRET_NAME} \
+# --set token.hfSecret.key=${HF_SECRET_KEY}
+# ------------------------------------------------
 
 export PORT='<YOUR_PORT>'
+
+# HUGGINGFACE OPTIONS
+# Option A: Pass Token Directly
 export HF_TOKEN='<YOUR_HUGGINGFACE_TOKEN>'
+# Option B: Use Existing Kubernetes Secret
+export HF_SECRET_NAME='<YOUR_SECRET_NAME>'
+export HF_SECRET_KEY='<YOUR_SECRET_KEY>'
+
 helm install k8s-benchmark inference-perf/ -f benchmark-values.yaml \
---set hfToken=${HF_TOKEN} \
---set "config.server.base_url=http://${SVC_IP}:${PORT}"
+--set "config.server.base_url=http://${SVC_IP}:${PORT}" \
+# ------------------------------------------------
+# HUGGINGFACE OPTION A
+--set token.hfToken=${HF_TOKEN}
+# ------------------------------------------------
+# HUGGINGFACE OPTION B
+# --set token.hfSecret.name=${HF_SECRET_NAME} \
+# --set token.hfSecret.key=${HF_SECRET_KEY}
+# ------------------------------------------------
 ```
 
 ## Clean Up