|
| 1 | +.. license-header |
| 2 | + SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 3 | + SPDX-License-Identifier: Apache-2.0 |
| 4 | +
|
| 5 | + Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | + you may not use this file except in compliance with the License. |
| 7 | + You may obtain a copy of the License at |
| 8 | +
|
| 9 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +
|
| 11 | + Unless required by applicable law or agreed to in writing, software |
| 12 | + distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + See the License for the specific language governing permissions and |
| 15 | + limitations under the License. |
| 16 | +
|
| 17 | +.. headings # #, * *, =, -, ^, " |
| 18 | +.. include:: ./common/vars.rst |
| 19 | + |
| 20 | + |
| 21 | +****************************************************************** |
| 22 | +[TECH PREVIEW] Configuration Assistance with Kubernetes Launch Kit |
| 23 | +****************************************************************** |
| 24 | + |
| 25 | +.. contents:: On this page |
| 26 | + :depth: 3 |
| 27 | + :local: |
| 28 | + :backlinks: none |
| 29 | + |
| 30 | +Kubernetes Launch Kit (l8k) is a CLI tool for deploying and managing NVIDIA cloud-native solutions on Kubernetes. The tool helps provide flexible deployment workflows for optimal network performance with SR-IOV, RDMA, and other networking technologies. |
| 31 | + |
| 32 | +------------- |
| 33 | +Prerequisites |
| 34 | +------------- |
| 35 | + |
| 36 | +For prerequisites, please refer to the :doc:`NVIDIA Network Operator Deployment Guide with Kubernetes <deployment-guide-kubernetes>` page. |
| 37 | + |
| 38 | +You will need a Kubernetes cluster with NVIDIA Network Operator helm chart installed. |
| 39 | + |
| 40 | +---------------- |
| 41 | +Operation Phases |
| 42 | +---------------- |
| 43 | + |
| 44 | +============================== |
| 45 | +Discover Cluster Configuration |
| 46 | +============================== |
| 47 | + |
| 48 | +Deploy a minimal Network Operator profile to automatically discover your cluster's network capabilities and hardware configuration. This phase can be skipped if you provide your own configuration file. |
| 49 | + |
| 50 | +============================== |
| 51 | +Select the Deployment Profile |
| 52 | +============================== |
| 53 | + |
| 54 | +Specify the desired deployment profile via CLI flags or with the natural language prompt for the LLM. |
| 55 | + |
| 56 | +========================= |
| 57 | +Generate Deployment Files |
| 58 | +========================= |
| 59 | + |
| 60 | +Based on the discovered/provided configuration, generate a complete set of YAML deployment files tailored to your selected network profile. |
| 61 | + |
| 62 | +------------------- |
| 63 | +Supported Use Cases |
| 64 | +------------------- |
| 65 | + |
| 66 | +Kubernetes Launch Kit supports the following use cases: |
| 67 | + |
| 68 | +- SR-IOV Network with RDMA |
| 69 | +- Host Device Network with RDMA |
| 70 | +- IP over InfiniBand with RDMA Shared Device |
| 71 | +- MacVLAN Network with RDMA Shared Device |
| 72 | +- SR-IOV InfiniBand Network with RDMA |
| 73 | + |
| 74 | +Please refer to the :doc:`quick-start/quick-start-k8s` page for more details. |
| 75 | + |
| 76 | +----- |
| 77 | +Usage |
| 78 | +----- |
| 79 | + |
| 80 | +Kubernetes Launch Kit is available as a docker container: |
| 81 | + |
| 82 | +.. code-block:: bash |
| 83 | + :substitutions: |
| 84 | +
|
| 85 | + mkdir ~/cluster-configuration |
| 86 | + cp /etc/kubernetes/admin.conf ~/cluster-configuration/kubeconfig |
| 87 | + docker run -v ~/cluster-configuration:/cluster-configuration --net=host |k8s-launch-kit-repository|/k8s-launch-kit:|k8s-launch-kit-version| --discover-cluster-config --kubeconfig /cluster-configuration/kubeconfig --save-cluster-config /cluster-configuration/config.yaml --log-level debug --save-deployment-files /cluster-configuration/deployments --fabric infiniband --deployment-type rdma_shared --multirail |
| 88 | +
|
| 89 | +Don't forget to enable --net=host and mount the necessary directories for input and output files with -v. |
| 90 | + |
| 91 | +.. code-block:: text |
| 92 | +
|
| 93 | + K8s Launch Kit (l8k) is a CLI tool for deploying and managing NVIDIA cloud-native solutions on Kubernetes. The tool helps provide flexible deployment workflows for optimal network performance with SR-IOV, RDMA, and other networking technologies. |
| 94 | +
|
| 95 | + ### Discover Cluster Configuration |
| 96 | + Deploy a minimal Network Operator profile to automatically discover your cluster's |
| 97 | + network capabilities and hardware configuration by using --discover-cluster-config. |
| 98 | + This phase can be skipped if you provide your own configuration file by using --user-config. |
| 99 | + This phase requires --kubeconfig to be specified. |
| 100 | +
|
| 101 | + ### Generate Deployment Files |
| 102 | + Based on the discovered or provided configuration, |
| 103 | + generate a complete set of YAML deployment files for the selected network profile. |
| 104 | + Files can be saved to disk using --save-deployment-files. |
| 105 | + The profile can be defined manually with --fabric, --deployment-type and --multirail flags, |
| 106 | + OR generated by an LLM-assisted profile generator with --prompt (requires --llm-api-key and --llm-vendor). |
| 107 | +
|
| 108 | + ### Deploy to Cluster |
| 109 | + Apply the generated deployment files to your Kubernetes cluster by using --deploy. This phase requires --kubeconfig and can be skipped if --deploy is not specified. |
| 110 | +
|
| 111 | + Usage: |
| 112 | + l8k [flags] |
| 113 | + l8k [command] |
| 114 | +
|
| 115 | + Available Commands: |
| 116 | + completion Generate the autocompletion script for the specified shell |
| 117 | + help Help about any command |
| 118 | + version Print the version number |
| 119 | +
|
| 120 | + Flags: |
| 121 | + --ai Enable AI deployment |
| 122 | + --deploy Deploy the generated files to the Kubernetes cluster |
| 123 | + --deployment-type string Select the deployment type (sriov, rdma_shared, host_device) |
| 124 | + --discover-cluster-config Deploy a thin Network Operator profile to discover cluster capabilities |
| 125 | + --enabled-plugins string Comma-separated list of plugins to enable (default "network-operator") |
| 126 | + --fabric string Select the fabric type to deploy (infiniband, ethernet) |
| 127 | + -h, --help help for l8k |
| 128 | + --kubeconfig string Path to kubeconfig file for cluster deployment (required when using --deploy) |
| 129 | + --llm-api-key string API key for the LLM API (required when using --prompt) |
| 130 | + --llm-api-url string API URL for the LLM API (required when using --prompt) |
| 131 | + --llm-vendor string Vendor of the LLM API (required when using --prompt) (default "openai-azure") |
| 132 | + --log-level string Log level (debug, info, warn, error) (default "info") |
| 133 | + --multirail Enable multirail deployment |
| 134 | + --prompt string Path to file with a prompt to use for LLM-assisted profile generation |
| 135 | + --save-cluster-config string Save discovered cluster configuration to the specified path (default "/opt/nvidia/k8s-launch-kit/cluster-config.yaml") |
| 136 | + --save-deployment-files string Save generated deployment files to the specified directory (default "/opt/nvidia/k8s-launch-kit/deployment") |
| 137 | + --spectrum-x Enable Spectrum X deployment |
| 138 | + --user-config string Use provided cluster configuration file instead of auto-discovery (skips cluster discovery) |
| 139 | +
|
| 140 | + Use "l8k [command] --help" for more information about a command. |
| 141 | +
|
| 142 | +-------------- |
| 143 | +Usage Examples |
| 144 | +-------------- |
| 145 | + |
| 146 | +================= |
| 147 | +Complete Workflow |
| 148 | +================= |
| 149 | +Discover cluster config, generate files, and deploy: |
| 150 | + |
| 151 | +.. code-block:: bash |
| 152 | +
|
| 153 | + l8k --discover-cluster-config --save-cluster-config ./cluster-config.yaml \ |
| 154 | + --fabric ethernet --deployment-type sriov --multirail \ |
| 155 | + --save-deployment-files ./deployments \ |
| 156 | + --deploy --kubeconfig ~/.kube/config |
| 157 | +
|
| 158 | +
|
| 159 | +================================ |
| 160 | +Discover Cluster Configuration |
| 161 | +================================ |
| 162 | + |
| 163 | +.. code-block:: bash |
| 164 | +
|
| 165 | + l8k --discover-cluster-config --save-cluster-config ./my-cluster-config.yaml |
| 166 | +
|
| 167 | +
|
| 168 | +========================== |
| 169 | +Use Existing Configuration |
| 170 | +========================== |
| 171 | + |
| 172 | +Generate and deploy with pre-existing config: |
| 173 | + |
| 174 | +.. code-block:: bash |
| 175 | +
|
| 176 | + l8k --user-config ./existing-config.yaml \ |
| 177 | + --fabric ethernet --deployment-type sriov --multirail \ |
| 178 | + --deploy --kubeconfig ~/.kube/config |
| 179 | +
|
| 180 | +========================= |
| 181 | +Generate Deployment Files |
| 182 | +========================= |
| 183 | + |
| 184 | +.. code-block:: bash |
| 185 | +
|
| 186 | + l8k --user-config ./config.yaml \ |
| 187 | + --fabric ethernet --deployment-type sriov --multirail \ |
| 188 | + --save-deployment-files ./deployments |
| 189 | +
|
| 190 | +======================================================= |
| 191 | +Generate Deployment Files using Natural Language Prompt |
| 192 | +======================================================= |
| 193 | + |
| 194 | +Kubernetes Launch Kit supports a LLM-assisted profile generation. You can provide a natural language prompt to the tool and it will generate a deployment profile for you. |
| 195 | +To configure the LLM, you need to provide the API key to OpenAI Azure backend. |
| 196 | + |
| 197 | +.. code-block:: bash |
| 198 | +
|
| 199 | + echo "I want to enable multirail networking in my AI cluster" > requirements.txt |
| 200 | + l8k --user-config ./config.yaml \ |
| 201 | + --prompt requirements.txt --llm-vendor openai-azure --llm-api-key <OPENAI_AZURE_KEY> \ |
| 202 | + --save-deployment-files ./deployments |
| 203 | +
|
| 204 | +-------------------------- |
| 205 | +Configuration File Format |
| 206 | +-------------------------- |
| 207 | + |
| 208 | +After the cluster configuration is discovered, the tool will save the configuration to a file. |
| 209 | +You can use this file as a starting point for your own configuration. Own configuration file can be provided to the tool using `--user-config` flag. |
| 210 | + |
| 211 | +.. code-block:: yaml |
| 212 | + :substitutions: |
| 213 | +
|
| 214 | + networkOperator: |
| 215 | + version: |k8s-launch-kit-version| |
| 216 | + componentVersion: |k8s-launch-kit-component-version| |
| 217 | + repository: |k8s-launch-kit-network-operator-repository| |
| 218 | + namespace: nvidia-network-operator |
| 219 | + nvIpam: |
| 220 | + poolName: nv-ipam-pool |
| 221 | + subnets: |
| 222 | + - subnet: 192.168.2.0/24 |
| 223 | + gateway: 192.168.2.1 |
| 224 | + - subnet: 192.168.3.0/24 |
| 225 | + gateway: 192.168.3.1 |
| 226 | + - subnet: 192.168.4.0/24 |
| 227 | + gateway: 192.168.4.1 |
| 228 | + - subnet: 192.168.5.0/24 |
| 229 | + gateway: 192.168.5.1 |
| 230 | + - subnet: 192.168.6.0/24 |
| 231 | + gateway: 192.168.6.1 |
| 232 | + - subnet: 192.168.7.0/24 |
| 233 | + gateway: 192.168.7.1 |
| 234 | + - subnet: 192.168.8.0/24 |
| 235 | + gateway: 192.168.8.1 |
| 236 | + - subnet: 192.168.9.0/24 |
| 237 | + gateway: 192.168.9.1 |
| 238 | + - subnet: 192.168.10.0/24 |
| 239 | + gateway: 192.168.10.1 |
| 240 | + sriov: |
| 241 | + mtu: 9000 |
| 242 | + numVfs: 8 |
| 243 | + priority: 90 |
| 244 | + resourceName: sriov_resource |
| 245 | + networkName: sriov_network |
| 246 | + hostdev: |
| 247 | + resourceName: hostdev-resource |
| 248 | + networkName: hostdev-network |
| 249 | + rdmaShared: |
| 250 | + resourceName: rdma_shared_resource |
| 251 | + hcaMax: 63 |
| 252 | + ipoib: |
| 253 | + networkName: ipoib-network |
| 254 | + macvlan: |
| 255 | + networkName: macvlan-network |
| 256 | + clusterConfig: |
| 257 | + capabilities: |
| 258 | + nodes: |
| 259 | + sriov: true |
| 260 | + rdma: true |
| 261 | + ib: true |
| 262 | + pfs: |
| 263 | + - rdmaDevice: mlx5_0 |
| 264 | + pciAddress: "0000:03:00.0" |
| 265 | + networkInterface: enp3s0f0np0 |
| 266 | + traffic: east-west |
| 267 | + - rdmaDevice: mlx5_1 |
| 268 | + pciAddress: "0000:03:00.1" |
| 269 | + networkInterface: enp3s0f1np1 |
| 270 | + traffic: east-west |
| 271 | + - rdmaDevice: mlx5_2 |
| 272 | + pciAddress: 0000:81:00.0 |
| 273 | + networkInterface: enp129s0np0 |
| 274 | + traffic: east-west |
| 275 | + workerNodes: |
| 276 | + - cloud-dev-41 |
| 277 | + - cloud-dev-40 |
0 commit comments