NVIDIA
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cmd/podgrouper/app/app.go‎
Lines changed: 2 additions & 2 deletions b/‎cmd/podgrouper/app/app.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎deployments/kai-scheduler/crds/kai.scheduler_configs.yaml‎
Lines changed: 0 additions & 2 deletions b/‎deployments/kai-scheduler/crds/kai.scheduler_configs.yaml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml‎
Lines changed: 52 additions & 0 deletions b/‎deployments/kai-scheduler/crds/kai.scheduler_schedulingshards.yaml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎deployments/kai-scheduler/templates/rbac/prometheus-binding.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deployments/kai-scheduler/templates/rbac/prometheus-binding.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/apis/kai/v1/global.go‎
Lines changed: 0 additions & 3 deletions b/‎pkg/apis/kai/v1/global.go‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎pkg/apis/kai/v1/schedulingshard_types.go‎
Lines changed: 9 additions & 0 deletions b/‎pkg/apis/kai/v1/schedulingshard_types.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎pkg/apis/kai/v1/zz_generated.deepcopy.go‎
Lines changed: 9 additions & 5 deletions b/‎pkg/apis/kai/v1/zz_generated.deepcopy.go‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎pkg/operator/operands/binder/binder_test.go‎
Lines changed: 50 additions & 17 deletions b/‎pkg/operator/operands/binder/binder_test.go‎
Lines changed: 50 additions & 17 deletions
diff --git a/‎pkg/operator/operands/binder/resources.go‎
Lines changed: 8 additions & 1 deletion b/‎pkg/operator/operands/binder/resources.go‎
Lines changed: 8 additions & 1 deletion
@@ -14,12 +14,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Added enforcement of the `nvidia` runtime class for GPU pods, with the option to enforce a custom runtime class, or disable enforcement entirely.
 - Added a preferred podAntiAffinity term by default for all services, can be set to required instead by setting `global.requireDefaultPodAffinityTerm`
 - Added support for service-level affinities
+- Added time aware scheduling configurations in scheduling shard
 
 ### Fixed
 - (Openshift only) - High CPU usage for the operator pod due to continues reconciles
 - Fixed a bug where the scheduler would not re-try updating podgroup status after failure
 - Fixed a bug where ray workloads gang scheduling would ignore `minReplicas` if autoscaling was not set
 - KAI Config wrong statuses when prometheus operand is enabled
+- GPU-Operator v25.10.0 support for CDI enabled environments
 
 ## [v0.9.1] - 20250-09-15
 
 
@@ -24,7 +24,7 @@ import (
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	"sigs.k8s.io/controller-runtime/pkg/webhook"
 
-	"github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2"
+	v2 "github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2"
 	kubeAiSchedulerV2alpha2 "github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2alpha2"
 	controllers "github.com/NVIDIA/KAI-scheduler/pkg/podgrouper"
 	pluginshub "github.com/NVIDIA/KAI-scheduler/pkg/podgrouper/podgrouper/hub"
@@ -51,7 +51,7 @@ func init() {
 
 type App struct {
 	Mgr               manager.Manager
-	DefaultPluginsHub pluginshub.PluginsHub
+	DefaultPluginsHub *pluginshub.DefaultPluginsHub
 
 	configs    controllers.Configs
 	pluginsHub pluginshub.PluginsHub
 
@@ -3256,8 +3256,6 @@ spec:
                     description: PodLabelSelector filters pods for webhooks and pod
                       grouper
                     type: object
-                  prometheusEnabled:
-                    type: boolean
                   queueLabelKey:
                     description: QueueLabelKey specifies the pod label key whose value
                       will be the queue name of the pod.
 
@@ -55,6 +55,10 @@ spec:
                     * Only valid flags defined in the scheduler's flag set will be accepted
                     * Duplicated flags will override the behavior of flags generated by other fields
                 type: object
+              kValue:
+                description: KValue specifies the kValue for the proportion plugin.
+                  Default is 1.0.
+                type: number
               minRuntime:
                 description: MinRuntime specifies the minimum runtime of a jobs in
                   the shard
@@ -87,6 +91,54 @@ spec:
                 description: QueueDepthPerAction max number of jobs to try for action
                   per queue
                 type: object
+              usageDBConfig:
+                description: UsageDBConfig defines configuration for the usage db
+                  client
+                properties:
+                  clientType:
+                    type: string
+                  connectionString:
+                    type: string
+                  connectionStringEnvVar:
+                    type: string
+                  usageParams:
+                    description: UsageParams defines common params for all usage db
+                      clients. Some clients may not support all the params.
+                    properties:
+                      extraParams:
+                        additionalProperties:
+                          type: string
+                        description: ExtraParams are extra parameters for the usage
+                          db client, which are client specific.
+                        type: object
+                      fetchInterval:
+                        description: Fetch interval of the usage. Default is 1 minute.
+                        type: string
+                      halfLifePeriod:
+                        description: Half life period of the usage. If not set, or
+                          set to 0, the usage will not be decayed.
+                        type: string
+                      stalenessPeriod:
+                        description: Staleness period of the usage. Default is 5 minutes.
+                        type: string
+                      tumblingWindowCronString:
+                        description: A cron string used to determine when to reset
+                          resource usage for all queues.
+                        type: string
+                      waitTimeout:
+                        description: Wait timeout of the usage. Default is 1 minute.
+                        type: string
+                      windowSize:
+                        description: Window size of the usage. Default is 1 week.
+                        type: string
+                      windowType:
+                        description: Window type for time-series aggregation. If not
+                          set, defaults to sliding.
+                        type: string
+                    type: object
+                required:
+                - clientType
+                type: object
             type: object
           status:
             description: SchedulingShardStatus defines the observed state of SchedulingShard
 
@@ -7,7 +7,7 @@ metadata:
   name: kai-prometheus
 subjects:
   - kind: ServiceAccount
-    name: kai-prometheus
+    name: prometheus
     namespace: kai-scheduler
 roleRef:
   kind: ClusterRole
 
@@ -68,9 +68,6 @@ type GlobalConfig struct {
 	// +kubebuilder:validation:Optional
 	PodLabelSelector map[string]string `json:"podLabelSelector,omitempty"`
 
-	// +kubebuilder:validation:Optional
-	PrometheusEnabled *bool `json:"prometheusEnabled,omitempty"`
-
 	// Connection defines the connection configuration for TSDB
 	// +kubebuilder:validation:Optional
 	ExternalTSDBConnection *Connection `json:"connection,omitempty"`
 
@@ -21,6 +21,7 @@ import (
 	"k8s.io/utils/ptr"
 
 	"github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/common"
+	usagedbapi "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/cache/usagedb/api"
 )
 
 const (
@@ -58,6 +59,14 @@ type SchedulingShardSpec struct {
 	// MinRuntime specifies the minimum runtime of a jobs in the shard
 	// +kubebuilder:validation:Optional
 	MinRuntime *MinRuntime `json:"minRuntime,omitempty"`
+
+	// KValue specifies the kValue for the proportion plugin. Default is 1.0.
+	// +kubebuilder:validation:Optional
+	KValue *float64 `json:"kValue,omitempty"`
+
+	// UsageDBConfig defines configuration for the usage db client
+	// +kubebuilder:validation:Optional
+	UsageDBConfig *usagedbapi.UsageDBConfig `yaml:"usageDBConfig,omitempty" json:"usageDBConfig,omitempty"`
 }
 
 func (s *SchedulingShardSpec) SetDefaultsWhereNeeded() {
 
@@ -96,26 +96,59 @@ var _ = Describe("Binder", func() {
 				Expect(deployment.Spec.Template.Labels).To(HaveKeyWithValue("kai", "scheduler"))
 			})
 
-			It("sets CDI flag if set in cluser policy", func(ctx context.Context) {
-				clusterPolicy := &nvidiav1.ClusterPolicy{
-					ObjectMeta: metav1.ObjectMeta{
-						Name: "test",
-					},
-					Spec: nvidiav1.ClusterPolicySpec{
-						CDI: nvidiav1.CDIConfigSpec{
-							Enabled: ptr.To(true),
-							Default: ptr.To(true),
+			Context("CDI Detection", func() {
+				var (
+					clusterPolicy *nvidiav1.ClusterPolicy
+				)
+				BeforeEach(func() {
+					clusterPolicy = &nvidiav1.ClusterPolicy{
+						ObjectMeta: metav1.ObjectMeta{
+							Name: "test",
 						},
-					},
-				}
+						Spec: nvidiav1.ClusterPolicySpec{
+							CDI: nvidiav1.CDIConfigSpec{
+								Enabled: ptr.To(true),
+								Default: ptr.To(true),
+							},
+						},
+					}
+				})
 
-				Expect(fakeKubeClient.Create(ctx, clusterPolicy)).To(Succeed())
-				objects, err := b.DesiredState(ctx, fakeKubeClient, kaiConfig)
-				Expect(err).To(BeNil())
+				It("sets CDI flag if set in cluser policy", func(ctx context.Context) {
+					Expect(fakeKubeClient.Create(ctx, clusterPolicy)).To(Succeed())
+					objects, err := b.DesiredState(ctx, fakeKubeClient, kaiConfig)
+					Expect(err).To(BeNil())
 
-				deploymentT := test_utils.FindTypeInObjects[*appsv1.Deployment](objects)
-				Expect(deploymentT).NotTo(BeNil())
-				Expect((*deploymentT).Spec.Template.Spec.Containers[0].Args).To(ContainElement("--cdi-enabled=true"))
+					deploymentT := test_utils.FindTypeInObjects[*appsv1.Deployment](objects)
+					Expect(deploymentT).NotTo(BeNil())
+					Expect((*deploymentT).Spec.Template.Spec.Containers[0].Args).To(ContainElement("--cdi-enabled=true"))
+				})
+
+				It("sets CDI flag to false if not set by default cluser policy", func(ctx context.Context) {
+					clusterPolicy.Spec.CDI.Default = ptr.To(false)
+					Expect(fakeKubeClient.Create(ctx, clusterPolicy)).To(Succeed())
+					objects, err := b.DesiredState(ctx, fakeKubeClient, kaiConfig)
+					Expect(err).To(BeNil())
+
+					deploymentT := test_utils.FindTypeInObjects[*appsv1.Deployment](objects)
+					Expect(deploymentT).NotTo(BeNil())
+					Expect((*deploymentT).Spec.Template.Spec.Containers[0].Args).To(ContainElement("--cdi-enabled=false"))
+				})
+
+				It("detects CDI state with GPU Operator >= v25.10.0", func(ctx context.Context) {
+					clusterPolicy.Labels = map[string]string{
+						versionLabelName: gpuOperatorVersionDefaultCDIDeprecated,
+					}
+					clusterPolicy.Spec.CDI.Default = ptr.To(false)
+					Expect(fakeKubeClient.Create(ctx, clusterPolicy)).To(Succeed())
+
+					objects, err := b.DesiredState(ctx, fakeKubeClient, kaiConfig)
+					Expect(err).To(BeNil())
+
+					deploymentT := test_utils.FindTypeInObjects[*appsv1.Deployment](objects)
+					Expect(deploymentT).NotTo(BeNil())
+					Expect((*deploymentT).Spec.Template.Spec.Containers[0].Args).To(ContainElement("--cdi-enabled=true"))
+				})
 			})
 		})
 
 
@@ -14,6 +14,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/meta"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/apimachinery/pkg/version"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
@@ -26,7 +27,9 @@ import (
 )
 
 const (
-	defaultResourceName = "binder"
+	defaultResourceName                    = "binder"
+	gpuOperatorVersionDefaultCDIDeprecated = "v25.10.0"
+	versionLabelName                       = "app.kubernetes.io/version"
 )
 
 func (b *Binder) deploymentForKAIConfig(
@@ -184,6 +187,10 @@ func isCdiEnabled(ctx context.Context, readerClient client.Reader) (bool, error)
 
 	nvidiaClusterPolicy := nvidiaClusterPolicies.Items[0]
 	if nvidiaClusterPolicy.Spec.CDI.Enabled != nil && *nvidiaClusterPolicy.Spec.CDI.Enabled {
+		gpuOperatorVersion, found := nvidiaClusterPolicy.Labels[versionLabelName]
+		if found && version.CompareKubeAwareVersionStrings(gpuOperatorVersion, gpuOperatorVersionDefaultCDIDeprecated) >= 0 {
+			return true, nil
+		}
 		if nvidiaClusterPolicy.Spec.CDI.Default != nil && *nvidiaClusterPolicy.Spec.CDI.Default {
 			return true, nil
 		}