NVIDIA
diff --git a/‎pkg/admission/plugins/plugins.go‎
Lines changed: 57 additions & 0 deletions b/‎pkg/admission/plugins/plugins.go‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎pkg/admission/webhook/v1alpha2/gpusharing/gpu_sharing.go‎
Lines changed: 145 additions & 0 deletions b/‎pkg/admission/webhook/v1alpha2/gpusharing/gpu_sharing.go‎
Lines changed: 145 additions & 0 deletions
diff --git a/‎pkg/admission/webhook/v1alpha2/gpusharing/gpu_sharing_test.go‎
Lines changed: 183 additions & 0 deletions b/‎pkg/admission/webhook/v1alpha2/gpusharing/gpu_sharing_test.go‎
Lines changed: 183 additions & 0 deletions
@@ -0,0 +1,57 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package plugins
+
+import (
+	"context"
+
+	v1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+type Plugin interface {
+	Name() string
+	Validate(*v1.Pod) error
+	Mutate(*v1.Pod) error
+}
+
+type KaiAdmissionPlugins struct {
+	plugins []Plugin
+}
+
+func New() *KaiAdmissionPlugins {
+	return &KaiAdmissionPlugins{
+		plugins: []Plugin{},
+	}
+}
+
+func (bp *KaiAdmissionPlugins) RegisterPlugin(plugin Plugin) {
+	bp.plugins = append(bp.plugins, plugin)
+}
+
+func (bp *KaiAdmissionPlugins) Validate(pod *v1.Pod) error {
+	for _, p := range bp.plugins {
+		err := p.Validate(pod)
+		if err != nil {
+			logger := log.FromContext(context.Background())
+			logger.Error(err, "pod validation failed for pod",
+				"namespace", pod.Namespace, "name", pod.Name, "plugin", p.Name())
+			return err
+		}
+	}
+	return nil
+}
+
+func (bp *KaiAdmissionPlugins) Mutate(pod *v1.Pod) error {
+	for _, p := range bp.plugins {
+		err := p.Mutate(pod)
+		if err != nil {
+			logger := log.FromContext(context.Background())
+			logger.Error(err, "pod mutation failed for pod",
+				"namespace", pod.Namespace, "name", pod.Name, "plugin", p.Name())
+			return err
+		}
+	}
+	return nil
+}
@@ -0,0 +1,145 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package gpusharing
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"golang.org/x/exp/slices"
+	v1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	"github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v1alpha2"
+	"github.com/NVIDIA/KAI-scheduler/pkg/binder/common/gpusharingconfigmap"
+	"github.com/NVIDIA/KAI-scheduler/pkg/common/resources"
+
+	"github.com/NVIDIA/KAI-scheduler/pkg/binder/common"
+	gpurequesthandler "github.com/NVIDIA/KAI-scheduler/pkg/binder/plugins/gpusharing/gpu-request"
+	"github.com/NVIDIA/KAI-scheduler/pkg/binder/plugins/state"
+)
+
+const (
+	fractionContainerIndex = 0
+	CdiDeviceNameBase      = "k8s.device-plugin.nvidia.com/gpu=%s"
+)
+
+type GPUSharing struct {
+	kubeClient             client.Client
+	gpuDevicePluginUsesCdi bool
+	gpuSharingEnabled      bool
+}
+
+func New(kubeClient client.Client, gpuDevicePluginUsesCdi bool, gpuSharingEnabled bool) *GPUSharing {
+	return &GPUSharing{
+		kubeClient:             kubeClient,
+		gpuDevicePluginUsesCdi: gpuDevicePluginUsesCdi,
+		gpuSharingEnabled:      gpuSharingEnabled,
+	}
+}
+
+func (p *GPUSharing) Name() string {
+	return "gpusharing"
+}
+
+func (p *GPUSharing) Validate(pod *v1.Pod) error {
+	if !p.gpuSharingEnabled && resources.RequestsGPUFraction(pod) {
+		return fmt.Errorf(
+			"attempting to create a pod %s/%s with gpu sharing request, while GPU sharing is disabled",
+			pod.Namespace, pod.Name,
+		)
+	}
+	return gpurequesthandler.ValidateGpuRequests(pod)
+}
+
+func (p *GPUSharing) Mutate(pod *v1.Pod) error {
+	if len(pod.Spec.Containers) == 0 {
+		return nil
+	}
+
+	if !resources.RequestsGPUFraction(pod) {
+		return nil
+	}
+
+	containerRef := &gpusharingconfigmap.PodContainerRef{
+		Container: &pod.Spec.Containers[fractionContainerIndex],
+		Index:     fractionContainerIndex,
+		Type:      gpusharingconfigmap.RegularContainer,
+	}
+	capabilitiesConfigMapName := gpusharingconfigmap.SetGpuCapabilitiesConfigMapName(pod, containerRef)
+	directEnvVarsMapName, err := gpusharingconfigmap.ExtractDirectEnvVarsConfigMapName(pod, containerRef)
+	if err != nil {
+		return err
+	}
+
+	common.AddGPUSharingEnvVars(containerRef.Container, capabilitiesConfigMapName)
+	common.SetConfigMapVolume(pod, capabilitiesConfigMapName)
+	common.AddDirectEnvVarsConfigMapSource(containerRef.Container, directEnvVarsMapName)
+
+	return nil
+}
+
+func (p *GPUSharing) PreBind(
+	ctx context.Context, pod *v1.Pod, _ *v1.Node, bindRequest *v1alpha2.BindRequest, state *state.BindingState,
+) error {
+	if !common.IsSharedGPUAllocation(bindRequest) {
+		return nil
+	}
+
+	reservedGPUIds := slices.Clone(state.ReservedGPUIds)
+	if p.gpuDevicePluginUsesCdi {
+		for index, gpuIndex := range reservedGPUIds {
+			reservedGPUIds[index] = fmt.Sprintf(CdiDeviceNameBase, gpuIndex)
+		}
+	}
+
+	containerRef := &gpusharingconfigmap.PodContainerRef{
+		Container: &pod.Spec.Containers[fractionContainerIndex],
+		Index:     fractionContainerIndex,
+		Type:      gpusharingconfigmap.RegularContainer,
+	}
+	err := p.createCapabilitiesConfigMapIfMissing(ctx, pod, containerRef)
+	if err != nil {
+		return fmt.Errorf("failed to create capabilities configmap: %w", err)
+	}
+
+	err = p.createDirectEnvMapIfMissing(ctx, pod, containerRef)
+	if err != nil {
+		return fmt.Errorf("failed to create env configmap: %w", err)
+	}
+
+	nVisibleDevicesStr := strings.Join(reservedGPUIds, ",")
+	err = common.SetNvidiaVisibleDevices(ctx, p.kubeClient, pod, containerRef, nVisibleDevicesStr)
+	if err != nil {
+		return err
+	}
+
+	return common.SetGPUPortion(ctx, p.kubeClient, pod, containerRef, bindRequest.Spec.ReceivedGPU.Portion)
+}
+
+func (p *GPUSharing) createCapabilitiesConfigMapIfMissing(ctx context.Context, pod *v1.Pod,
+	containerRef *gpusharingconfigmap.PodContainerRef) error {
+	capabilitiesConfigMapName, err := gpusharingconfigmap.ExtractCapabilitiesConfigMapName(pod, containerRef)
+	if err != nil {
+		return fmt.Errorf("failed to get capabilities configmap name: %w", err)
+	}
+	err = gpusharingconfigmap.UpsertJobConfigMap(ctx, p.kubeClient, pod, capabilitiesConfigMapName, map[string]string{})
+	return err
+}
+
+func (p *GPUSharing) createDirectEnvMapIfMissing(ctx context.Context, pod *v1.Pod,
+	containerRef *gpusharingconfigmap.PodContainerRef) error {
+	directEnvVarsMapName, err := gpusharingconfigmap.ExtractDirectEnvVarsConfigMapName(pod, containerRef)
+	if err != nil {
+		return err
+	}
+	directEnvVars := make(map[string]string)
+	return gpusharingconfigmap.UpsertJobConfigMap(ctx, p.kubeClient, pod, directEnvVarsMapName, directEnvVars)
+}
+
+func (p *GPUSharing) PostBind(
+	context.Context, *v1.Pod, *v1.Node, *v1alpha2.BindRequest, *state.BindingState,
+) {
+}
@@ -0,0 +1,183 @@
+// Copyright 2025 NVIDIA CORPORATION
+// SPDX-License-Identifier: Apache-2.0
+
+package gpusharing
+
+import (
+	"fmt"
+	"testing"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+	"github.com/NVIDIA/KAI-scheduler/pkg/common/constants"
+)
+
+func TestValidate(t *testing.T) {
+	tests := []struct {
+		name              string
+		pod               *v1.Pod
+		GPUSharingEnabled bool
+		error             error
+	}{
+		{
+			name: "GPU sharing disabled, whole GPU pod",
+			pod: &v1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Limits: v1.ResourceList{
+									constants.GpuResource: resource.MustParse("1"),
+								},
+							},
+						},
+					},
+				},
+			},
+			GPUSharingEnabled: false,
+			error:             nil,
+		},
+		{
+			name: "GPU sharing enabled, whole GPU pod",
+			pod: &v1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Limits: v1.ResourceList{
+									constants.GpuResource: resource.MustParse("1"),
+								},
+							},
+						},
+					},
+				},
+			},
+			GPUSharingEnabled: true,
+			error:             nil,
+		},
+		{
+			name: "GPU sharing disabled, GPU sharing pod - fraction",
+			pod: &v1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+					Annotations: map[string]string{
+						constants.GpuFraction: "0.5",
+					},
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Limits: v1.ResourceList{},
+							},
+						},
+					},
+				},
+			},
+			GPUSharingEnabled: false,
+			error: fmt.Errorf("attempting to create a pod test-namespace/test-pod with gpu " +
+				"sharing request, while GPU sharing is disabled"),
+		},
+		{
+			name: "GPU sharing enabled, GPU sharing pod - fraction",
+			pod: &v1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+					Annotations: map[string]string{
+						constants.GpuFraction: "0.5",
+					},
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Limits: v1.ResourceList{},
+							},
+						},
+					},
+				},
+			},
+			GPUSharingEnabled: true,
+			error:             nil,
+		},
+		{
+			name: "GPU sharing disabled, GPU sharing pod - memory",
+			pod: &v1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+					Annotations: map[string]string{
+						constants.GpuMemory: "1024",
+					},
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Limits: v1.ResourceList{},
+							},
+						},
+					},
+				},
+			},
+			GPUSharingEnabled: false,
+			error: fmt.Errorf("attempting to create a pod test-namespace/test-pod with gpu " +
+				"sharing request, while GPU sharing is disabled"),
+		},
+		{
+			name: "GPU sharing enabled, GPU sharing pod - memory",
+			pod: &v1.Pod{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "test-pod",
+					Namespace: "test-namespace",
+					Annotations: map[string]string{
+						constants.GpuMemory: "1024",
+					},
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Resources: v1.ResourceRequirements{
+								Limits: v1.ResourceList{},
+							},
+						},
+					},
+				},
+			},
+			GPUSharingEnabled: true,
+			error:             nil,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			kubeClient := fake.NewClientBuilder().WithRuntimeObjects(tt.pod).Build()
+			gpuSharingPlugin := New(kubeClient, false, tt.GPUSharingEnabled)
+			err := gpuSharingPlugin.Validate(tt.pod)
+			if err == nil && tt.error != nil {
+				t.Errorf("Validate() expected and error but actual is nil")
+				return
+			}
+			if err != nil && tt.error == nil {
+				t.Errorf("Validate() actual is nil but didn't expect and error. Error: %v", err)
+				return
+			}
+			if tt.error != nil && err.Error() != tt.error.Error() {
+				t.Errorf("Validate()\nactual: %v\nexpected: %v\n", err, tt.error)
+				return
+			}
+		})
+	}
+}