From 7cacbde3e841992689f7579e8d3c0478ea6a30ec Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 09:25:54 -0700 Subject: [PATCH 1/8] added v1 api --- apix/v1/inferencepool_types.go | 277 +++++++++++++++ apix/v1/shared_types.go | 141 ++++++++ ...ence.networking.k8s.io_inferencepools.yaml | 315 ++++++++++++++++++ 3 files changed, 733 insertions(+) create mode 100644 apix/v1/inferencepool_types.go create mode 100644 apix/v1/shared_types.go create mode 100644 config/crd/bases/inference.networking.k8s.io_inferencepools.yaml diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go new file mode 100644 index 000000000..f71418510 --- /dev/null +++ b/apix/v1/inferencepool_types.go @@ -0,0 +1,277 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// InferencePool is the Schema for the InferencePools API. +// +// +kubebuilder:object:root=true +// TODO: change the annotation once it gets officially approved +// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=unapproved, experimental-only" +// +kubebuilder:subresource:status +// +kubebuilder:storageversion +// +genclient +type InferencePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec InferencePoolSpec `json:"spec,omitempty"` + + // Status defines the observed state of InferencePool. + // + // +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}} + Status InferencePoolStatus `json:"status,omitempty"` +} + +// InferencePoolList contains a list of InferencePool. +// +// +kubebuilder:object:root=true +type InferencePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []InferencePool `json:"items"` +} + +// InferencePoolSpec defines the desired state of InferencePool +type InferencePoolSpec struct { + // Selector determines which Pods are members of this inference pool. + // It matches Pods by their labels only within the same namespace; cross-namespace + // selection is not supported. + // + // The structure of this LabelSelector is intentionally simple to be compatible + // with Kubernetes Service selectors, as some implementations may translate + // this configuration into a Service resource. + // + // +kubebuilder:validation:Required + Selector LabelSelector `json:"selector"` + + // TargetPorts defines the ports to access the selected model server Pods. + // + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=1 + // +listType=map + // +listMapKey=number + TargetPorts []Port `json:"targetPorts"` + + // Extension configures an endpoint picker as an extension service. + ExtensionRef *Extension `json:"extensionRef,omitempty"` +} + +type Port struct { + // Number defines the port number to access the selected model server Pods. + // The number must be in the range 1 to 65535. + // + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + // +kubebuilder:validation:Required + Number int32 `json:"number"` +} + +// Extension specifies how to configure an extension that runs the endpoint picker. +type Extension struct { + // Group is the group of the referent. + // The default value is "", representing the Core API group. + // + // +optional + // +kubebuilder:default="" + Group *Group `json:"group,omitempty"` + + // Kind is the Kubernetes resource kind of the referent. + // + // Defaults to "Service" when not specified. + // + // ExternalName services can refer to CNAME DNS records that may live + // outside of the cluster and as such are difficult to reason about in + // terms of conformance. They also may not be safe to forward to (see + // CVE-2021-25740 for more information). Implementations MUST NOT + // support ExternalName Services. + // + // +optional + // +kubebuilder:default=Service + Kind *Kind `json:"kind,omitempty"` + + // Name is the name of the referent. + // + // +kubebuilder:validation:Required + Name ObjectName `json:"name"` + + // The port number on the service running the extension. When unspecified, + // implementations SHOULD infer a default value of 9002 when the Kind is + // Service. + // + // +optional + PortNumber *PortNumber `json:"portNumber,omitempty"` + + // Configures how the gateway handles the case when the extension is not responsive. + // Defaults to failClose. + // + // +optional + // +kubebuilder:default="FailClose" + FailureMode *ExtensionFailureMode `json:"failureMode"` +} + +// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not +// responsive. +// +kubebuilder:validation:Enum=FailOpen;FailClose +type ExtensionFailureMode string + +const ( + // FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails. + FailOpen ExtensionFailureMode = "FailOpen" + // FailClose specifies that the proxy should drop the request when the Endpoint Picker fails. + FailClose ExtensionFailureMode = "FailClose" +) + +// InferencePoolStatus defines the observed state of InferencePool. +type InferencePoolStatus struct { + // Parents is a list of parent resources (usually Gateways) that are + // associated with the InferencePool, and the status of the InferencePool with respect to + // each parent. + // + // A maximum of 32 Gateways will be represented in this list. When the list contains + // `kind: Status, name: default`, it indicates that the InferencePool is not + // associated with any Gateway and a controller must perform the following: + // + // - Remove the parent when setting the "Accepted" condition. + // - Add the parent when the controller will no longer manage the InferencePool + // and no other parents exist. + // + // +kubebuilder:validation:MaxItems=32 + Parents []PoolStatus `json:"parent,omitempty"` +} + +// PoolStatus defines the observed state of InferencePool from a Gateway. +type PoolStatus struct { + // GatewayRef indicates the gateway that observed state of InferencePool. + GatewayRef ParentGatewayReference `json:"parentRef"` + + // Conditions track the state of the InferencePool. + // + // Known condition types are: + // + // * "Accepted" + // * "ResolvedRefs" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// InferencePoolConditionType is a type of condition for the InferencePool +type InferencePoolConditionType string + +// InferencePoolReason is the reason for a given InferencePoolConditionType +type InferencePoolReason string + +const ( + // This condition indicates whether the InferencePool has been accepted or rejected + // by a Gateway, and why. + // + // Possible reasons for this condition to be True are: + // + // * "Accepted" + // + // Possible reasons for this condition to be False are: + // + // * "NotSupportedByGateway" + // * "HTTPRouteNotAccepted" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // + // Controllers MAY raise this condition with other reasons, but should + // prefer to use the reasons listed above to improve interoperability. + InferencePoolConditionAccepted InferencePoolConditionType = "Accepted" + + // This reason is used with the "Accepted" condition when the InferencePool has been + // accepted by the Gateway. + InferencePoolReasonAccepted InferencePoolReason = "Accepted" + + // This reason is used with the "Accepted" condition when the InferencePool + // has not been accepted by a Gateway because the Gateway does not support + // InferencePool as a backend. + InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway" + + // This reason is used with the "Accepted" condition when the InferencePool is + // referenced by an HTTPRoute that has been rejected by the Gateway. The user + // should inspect the status of the referring HTTPRoute for the specific reason. + InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted" + + // This reason is used with the "Accepted" when a controller has not yet + // reconciled the InferencePool. + InferencePoolReasonPending InferencePoolReason = "Pending" +) + +const ( + // This condition indicates whether the controller was able to resolve all + // the object references for the InferencePool. + // + // Possible reasons for this condition to be True are: + // + // * "ResolvedRefs" + // + // Possible reasons for this condition to be False are: + // + // * "InvalidExtensionRef" + // + // Controllers MAY raise this condition with other reasons, but should + // prefer to use the reasons listed above to improve interoperability. + InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs" + + // This reason is used with the "ResolvedRefs" condition when the condition + // is true. + InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs" + + // This reason is used with the "ResolvedRefs" condition when the + // ExtensionRef is invalid in some way. This can include an unsupported kind + // or API group, or a reference to a resource that can not be found. + InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef" +) + +// ParentGatewayReference identifies an API object including its namespace, +// defaulting to Gateway. +type ParentGatewayReference struct { + // Group is the group of the referent. + // + // +optional + // +kubebuilder:default="gateway.networking.k8s.io" + Group *Group `json:"group"` + + // Kind is kind of the referent. For example "Gateway". + // + // +optional + // +kubebuilder:default=Gateway + Kind *Kind `json:"kind"` + + // Name is the name of the referent. + Name ObjectName `json:"name"` + + // Namespace is the namespace of the referent. If not present, + // the namespace of the referent is assumed to be the same as + // the namespace of the referring object. + // + // +optional + Namespace *Namespace `json:"namespace,omitempty"` +} diff --git a/apix/v1/shared_types.go b/apix/v1/shared_types.go new file mode 100644 index 000000000..bad7c1f85 --- /dev/null +++ b/apix/v1/shared_types.go @@ -0,0 +1,141 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +// Group refers to a Kubernetes Group. It must either be an empty string or a +// RFC 1123 subdomain. +// +// This validation is based off of the corresponding Kubernetes validation: +// https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/util/validation/validation.go#L208 +// +// Valid values include: +// +// * "" - empty string implies core Kubernetes API group +// * "gateway.networking.k8s.io" +// * "foo.example.com" +// +// Invalid values include: +// +// * "example.com/bar" - "/" is an invalid character +// +// +kubebuilder:validation:MaxLength=253 +// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$` +type Group string + +// Kind refers to a Kubernetes Kind. +// +// Valid values include: +// +// * "Service" +// * "HTTPRoute" +// +// Invalid values include: +// +// * "invalid/kind" - "/" is an invalid character +// +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=63 +// +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$` +type Kind string + +// ObjectName refers to the name of a Kubernetes object. +// Object names can have a variety of forms, including RFC 1123 subdomains, +// RFC 1123 labels, or RFC 1035 labels. +// +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=253 +type ObjectName string + +// Namespace refers to a Kubernetes namespace. It must be a RFC 1123 label. +// +// This validation is based off of the corresponding Kubernetes validation: +// https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/util/validation/validation.go#L187 +// +// This is used for Namespace name validation here: +// https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/api/validation/generic.go#L63 +// +// Valid values include: +// +// * "example" +// +// Invalid values include: +// +// * "example.com" - "." is an invalid character +// +// +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=63 +type Namespace string + +// PortNumber defines a network port. +// +// +kubebuilder:validation:Minimum=1 +// +kubebuilder:validation:Maximum=65535 +type PortNumber int32 + +// LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731 +// Duplicated as to not take an unexpected dependency on gw's API. +// +// LabelKey is the key of a label. This is used for validation +// of maps. This matches the Kubernetes "qualified name" validation that is used for labels. +// Labels are case sensitive, so: my-label and My-Label are considered distinct. +// +// Valid values include: +// +// * example +// * example.com +// * example.com/path +// * example.com/path.html +// +// Invalid values include: +// +// * example~ - "~" is an invalid character +// * example.com. - can not start or end with "." +// +// +kubebuilder:validation:MinLength=1 +// +kubebuilder:validation:MaxLength=253 +// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$` +type LabelKey string + +// LabelValue is the value of a label. This is used for validation +// of maps. This matches the Kubernetes label validation rules: +// * must be 63 characters or less (can be empty), +// * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]), +// * could contain dashes (-), underscores (_), dots (.), and alphanumerics between. +// +// Valid values include: +// +// * MyValue +// * my.name +// * 123-my-value +// +// +kubebuilder:validation:MinLength=0 +// +kubebuilder:validation:MaxLength=63 +// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$` +type LabelValue string + +// LabelSelector defines a query for resources based on their labels. +// This simplified version uses only the matchLabels field. +type LabelSelector struct { + // matchLabels contains a set of required {key,value} pairs. + // An object must match every label in this map to be selected. + // The matching logic is an AND operation on all entries. + // + // +kubebuilder:validation:Required + // +kubebuilder:validation:MaxItems=64 + MatchLabels map[LabelKey]LabelValue `json:"matchLabels,omitempty" protobuf:"bytes,1,rep,name=matchLabels"` +} diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml new file mode 100644 index 000000000..8fa084b14 --- /dev/null +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -0,0 +1,315 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + api-approved.kubernetes.io: unapproved, experimental-only + inference.networking.k8s.io/bundle-version: main-dev + creationTimestamp: null + name: inferencepools.inference.networking.k8s.io +spec: + group: inference.networking.k8s.io + names: + kind: InferencePool + listKind: InferencePoolList + plural: inferencepools + singular: inferencepool + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: | + InferencePool is the Schema for the InferencePools API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: InferencePoolSpec defines the desired state of InferencePool + properties: + extensionRef: + description: Extension configures an endpoint picker as an extension + service. + properties: + failureMode: + default: FailClose + description: |- + Configures how the gateway handles the case when the extension is not responsive. + Defaults to failClose. + enum: + - FailOpen + - FailClose + type: string + group: + default: "" + description: |- + Group is the group of the referent. + The default value is "", representing the Core API group. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Service + description: |- + Kind is the Kubernetes resource kind of the referent. + + Defaults to "Service" when not specified. + + ExternalName services can refer to CNAME DNS records that may live + outside of the cluster and as such are difficult to reason about in + terms of conformance. They also may not be safe to forward to (see + CVE-2021-25740 for more information). Implementations MUST NOT + support ExternalName Services. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + portNumber: + description: |- + The port number on the service running the extension. When unspecified, + implementations SHOULD infer a default value of 9002 when the Kind is + Service. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - name + type: object + selector: + description: |- + Selector determines which Pods are members of this inference pool. + It matches Pods by their labels only within the same namespace; cross-namespace + selection is not supported. + + The structure of this LabelSelector is intentionally simple to be compatible + with Kubernetes Service selectors, as some implementations may translate + this configuration into a Service resource. + properties: + matchLabels: + additionalProperties: + description: |- + LabelValue is the value of a label. This is used for validation + of maps. This matches the Kubernetes label validation rules: + * must be 63 characters or less (can be empty), + * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]), + * could contain dashes (-), underscores (_), dots (.), and alphanumerics between. + + Valid values include: + + * MyValue + * my.name + * 123-my-value + maxLength: 63 + minLength: 0 + pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ + type: string + description: |- + matchLabels contains a set of required {key,value} pairs. + An object must match every label in this map to be selected. + The matching logic is an AND operation on all entries. + type: object + required: + - matchLabels + type: object + targetPorts: + description: TargetPorts defines the ports to access the selected + model server Pods. + items: + properties: + number: + description: |- + Number defines the port number to access the selected model server Pods. + The number must be in the range 1 to 65535. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - number + type: object + maxItems: 1 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - number + x-kubernetes-list-type: map + required: + - selector + - targetPorts + type: object + status: + default: + parent: + - conditions: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Accepted + parentRef: + kind: Status + name: default + description: Status defines the observed state of InferencePool. + properties: + parent: + description: |- + Parents is a list of parent resources (usually Gateways) that are + associated with the InferencePool, and the status of the InferencePool with respect to + each parent. + + A maximum of 32 Gateways will be represented in this list. When the list contains + `kind: Status, name: default`, it indicates that the InferencePool is not + associated with any Gateway and a controller must perform the following: + + - Remove the parent when setting the "Accepted" condition. + - Add the parent when the controller will no longer manage the InferencePool + and no other parents exist. + items: + description: PoolStatus defines the observed state of InferencePool + from a Gateway. + properties: + conditions: + default: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Accepted + description: |- + Conditions track the state of the InferencePool. + + Known condition types are: + + * "Accepted" + * "ResolvedRefs" + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + parentRef: + description: GatewayRef indicates the gateway that observed + state of InferencePool. + properties: + group: + default: gateway.networking.k8s.io + description: Group is the group of the referent. + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: Kind is kind of the referent. For example "Gateway". + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: Name is the name of the referent. + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referent. If not present, + the namespace of the referent is assumed to be the same as + the namespace of the referring object. + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - name + type: object + required: + - parentRef + type: object + maxItems: 32 + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null \ No newline at end of file From 7ae076545f1a44dca65d3ec89b1980149e3d8dac Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 10:55:45 -0700 Subject: [PATCH 2/8] updated to use alias --- apix/v1/inferencepool_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index f71418510..9b6b04288 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -82,7 +82,7 @@ type Port struct { // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required - Number int32 `json:"number"` + Number PortNumber `json:"number"` } // Extension specifies how to configure an extension that runs the endpoint picker. From f4ccc6874259bb120b42134e51625493de5e4fe5 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 15:16:01 -0700 Subject: [PATCH 3/8] updated --- apix/v1/inferencepool_types.go | 60 ++++++++++--------- apix/v1/shared_types.go | 3 +- ...ence.networking.k8s.io_inferencepools.yaml | 9 ++- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index 9b6b04288..e4b6e2ca7 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -29,15 +29,19 @@ import ( // +kubebuilder:storageversion // +genclient type InferencePool struct { - metav1.TypeMeta `json:",inline"` + metav1.TypeMeta `json:",inline"` + + // +optional metav1.ObjectMeta `json:"metadata,omitempty"` - Spec InferencePoolSpec `json:"spec,omitempty"` + // +required + Spec InferencePoolSpec `json:"spec,omitzero"` // Status defines the observed state of InferencePool. // // +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}} - Status InferencePoolStatus `json:"status,omitempty"` + // +optional + Status InferencePoolStatus `json:"status,omitzero"` } // InferencePoolList contains a list of InferencePool. @@ -59,30 +63,27 @@ type InferencePoolSpec struct { // with Kubernetes Service selectors, as some implementations may translate // this configuration into a Service resource. // - // +kubebuilder:validation:Required - Selector LabelSelector `json:"selector"` + // +required + Selector LabelSelector `json:"selector,omitempty,omitzero"` - // TargetPorts defines the ports to access the selected model server Pods. - // - // +kubebuilder:validation:Required // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 // +listType=map // +listMapKey=number - TargetPorts []Port `json:"targetPorts"` + // +required + TargetPorts []Port `json:"targetPorts,omitempty"` // Extension configures an endpoint picker as an extension service. - ExtensionRef *Extension `json:"extensionRef,omitempty"` + // +required + ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } type Port struct { // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. // - // +kubebuilder:validation:Minimum=1 - // +kubebuilder:validation:Maximum=65535 - // +kubebuilder:validation:Required - Number PortNumber `json:"number"` + // +required + Number PortNumber `json:"number,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. @@ -106,26 +107,26 @@ type Extension struct { // // +optional // +kubebuilder:default=Service - Kind *Kind `json:"kind,omitempty"` + Kind Kind `json:"kind,omitempty"` // Name is the name of the referent. // - // +kubebuilder:validation:Required - Name ObjectName `json:"name"` + // +required + Name ObjectName `json:"name,omitempty"` // The port number on the service running the extension. When unspecified, // implementations SHOULD infer a default value of 9002 when the Kind is // Service. // // +optional - PortNumber *PortNumber `json:"portNumber,omitempty"` + PortNumber PortNumber `json:"portNumber,omitempty"` // Configures how the gateway handles the case when the extension is not responsive. // Defaults to failClose. // // +optional // +kubebuilder:default="FailClose" - FailureMode *ExtensionFailureMode `json:"failureMode"` + FailureMode ExtensionFailureMode `json:"failureMode,omitempty"` } // ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not @@ -141,6 +142,7 @@ const ( ) // InferencePoolStatus defines the observed state of InferencePool. +// +kubebuilder:validation:MinProperties=1 type InferencePoolStatus struct { // Parents is a list of parent resources (usually Gateways) that are // associated with the InferencePool, and the status of the InferencePool with respect to @@ -155,14 +157,13 @@ type InferencePoolStatus struct { // and no other parents exist. // // +kubebuilder:validation:MaxItems=32 + // +optional + // +listType=atomic Parents []PoolStatus `json:"parent,omitempty"` } // PoolStatus defines the observed state of InferencePool from a Gateway. type PoolStatus struct { - // GatewayRef indicates the gateway that observed state of InferencePool. - GatewayRef ParentGatewayReference `json:"parentRef"` - // Conditions track the state of the InferencePool. // // Known condition types are: @@ -176,6 +177,10 @@ type PoolStatus struct { // +kubebuilder:validation:MaxItems=8 // +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} Conditions []metav1.Condition `json:"conditions,omitempty"` + + // GatewayRef indicates the gateway that observed state of InferencePool. + // +required + GatewayRef ParentGatewayReference `json:"parentRef,omitzero"` } // InferencePoolConditionType is a type of condition for the InferencePool @@ -245,7 +250,7 @@ const ( InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs" // This reason is used with the "ResolvedRefs" condition when the - // ExtensionRef is invalid in some way. This can include an unsupported kind + // Extension is invalid in some way. This can include an unsupported kind // or API group, or a reference to a resource that can not be found. InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef" ) @@ -257,21 +262,22 @@ type ParentGatewayReference struct { // // +optional // +kubebuilder:default="gateway.networking.k8s.io" - Group *Group `json:"group"` + Group *Group `json:"group,omitempty"` // Kind is kind of the referent. For example "Gateway". // // +optional // +kubebuilder:default=Gateway - Kind *Kind `json:"kind"` + Kind Kind `json:"kind,omitempty"` // Name is the name of the referent. - Name ObjectName `json:"name"` + // +required + Name ObjectName `json:"name,omitempty"` // Namespace is the namespace of the referent. If not present, // the namespace of the referent is assumed to be the same as // the namespace of the referring object. // // +optional - Namespace *Namespace `json:"namespace,omitempty"` + Namespace Namespace `json:"namespace,omitempty"` } diff --git a/apix/v1/shared_types.go b/apix/v1/shared_types.go index bad7c1f85..64425e735 100644 --- a/apix/v1/shared_types.go +++ b/apix/v1/shared_types.go @@ -32,6 +32,7 @@ package v1 // // * "example.com/bar" - "/" is an invalid character // +// +kubebuilder:validation:MinLength=0 // +kubebuilder:validation:MaxLength=253 // +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$` type Group string @@ -135,7 +136,7 @@ type LabelSelector struct { // An object must match every label in this map to be selected. // The matching logic is an AND operation on all entries. // - // +kubebuilder:validation:Required + // +required // +kubebuilder:validation:MaxItems=64 MatchLabels map[LabelKey]LabelValue `json:"matchLabels,omitempty" protobuf:"bytes,1,rep,name=matchLabels"` } diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 8fa084b14..f5a102ee6 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -60,6 +60,7 @@ spec: Group is the group of the referent. The default value is "", representing the Core API group. maxLength: 253 + minLength: 0 pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string kind: @@ -132,8 +133,6 @@ spec: - matchLabels type: object targetPorts: - description: TargetPorts defines the ports to access the selected - model server Pods. items: properties: number: @@ -154,6 +153,7 @@ spec: - number x-kubernetes-list-type: map required: + - extensionRef - selector - targetPorts type: object @@ -170,6 +170,7 @@ spec: kind: Status name: default description: Status defines the observed state of InferencePool. + minProperties: 1 properties: parent: description: |- @@ -270,6 +271,7 @@ spec: default: gateway.networking.k8s.io description: Group is the group of the referent. maxLength: 253 + minLength: 0 pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string kind: @@ -301,7 +303,10 @@ spec: type: object maxItems: 32 type: array + x-kubernetes-list-type: atomic type: object + required: + - spec type: object served: true storage: true From 0050467797e587072430f02b43fc98b6bd8b1569 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 18 Aug 2025 15:10:35 -0700 Subject: [PATCH 4/8] updated api --- apix/v1/inferencepool_types.go | 7 ++++++- .../bases/inference.networking.k8s.io_inferencepools.yaml | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index e4b6e2ca7..7f76b03d8 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -25,6 +25,7 @@ import ( // +kubebuilder:object:root=true // TODO: change the annotation once it gets officially approved // +kubebuilder:metadata:annotations="api-approved.kubernetes.io=unapproved, experimental-only" +// +kubebuilder:resource:shortName=infpool // +kubebuilder:subresource:status // +kubebuilder:storageversion // +genclient @@ -66,6 +67,8 @@ type InferencePoolSpec struct { // +required Selector LabelSelector `json:"selector,omitempty,omitzero"` + // TargetPorts defines a list of ports that are exposed by this InferencePool. + // Currently, the list may only include a single port definition. // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 // +listType=map @@ -78,6 +81,7 @@ type InferencePoolSpec struct { ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } +// Port defines the network port that will be exposed by this InferencePool. type Port struct { // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. @@ -119,7 +123,8 @@ type Extension struct { // Service. // // +optional - PortNumber PortNumber `json:"portNumber,omitempty"` + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer here as 0 usually means all ports. + PortNumber *PortNumber `json:"portNumber,omitempty"` // Configures how the gateway handles the case when the extension is not responsive. // Defaults to failClose. diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index f5a102ee6..abfd94d84 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -12,6 +12,8 @@ spec: kind: InferencePool listKind: InferencePoolList plural: inferencepools + shortNames: + - infpool singular: inferencepool scope: Namespaced versions: @@ -133,7 +135,12 @@ spec: - matchLabels type: object targetPorts: + description: |- + TargetPorts defines a list of ports that are exposed by this InferencePool. + Currently, the list may only include a single port definition. items: + description: Port defines the network port that will be exposed + by this InferencePool. properties: number: description: |- From 80dee7be90240d2ba9a459b91332a5a286c89ff4 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 19 Aug 2025 13:53:22 -0700 Subject: [PATCH 5/8] updated targetports listtype --- apix/v1/inferencepool_types.go | 3 +-- .../crd/bases/inference.networking.k8s.io_inferencepools.yaml | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index 7f76b03d8..d2bcd9867 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -71,8 +71,7 @@ type InferencePoolSpec struct { // Currently, the list may only include a single port definition. // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 - // +listType=map - // +listMapKey=number + // +listType=atomic // +required TargetPorts []Port `json:"targetPorts,omitempty"` diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index abfd94d84..e0d2ab8dc 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -156,9 +156,7 @@ spec: maxItems: 1 minItems: 1 type: array - x-kubernetes-list-map-keys: - - number - x-kubernetes-list-type: map + x-kubernetes-list-type: atomic required: - extensionRef - selector From 9981a824a4631036cb24c2fe33d81b584600b829 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Wed, 20 Aug 2025 16:12:14 -0700 Subject: [PATCH 6/8] updated to use endpointpickerref --- apix/v1/inferencepool_types.go | 6 +++--- .../bases/inference.networking.k8s.io_inferencepools.yaml | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index d2bcd9867..f53781675 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -75,9 +75,9 @@ type InferencePoolSpec struct { // +required TargetPorts []Port `json:"targetPorts,omitempty"` - // Extension configures an endpoint picker as an extension service. + // EndpointPickerRef configures an endpoint picker as an extension service. // +required - ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` + EndpointPickerRef EndpointPickerRef `json:"endpointPickerRef,omitempty,omitzero"` } // Port defines the network port that will be exposed by this InferencePool. @@ -90,7 +90,7 @@ type Port struct { } // Extension specifies how to configure an extension that runs the endpoint picker. -type Extension struct { +type EndpointPickerRef struct { // Group is the group of the referent. // The default value is "", representing the Core API group. // diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index e0d2ab8dc..a30183977 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -43,9 +43,9 @@ spec: spec: description: InferencePoolSpec defines the desired state of InferencePool properties: - extensionRef: - description: Extension configures an endpoint picker as an extension - service. + endpointPickerRef: + description: EndpointPickerRef configures an endpoint picker as an + extension service. properties: failureMode: default: FailClose @@ -158,7 +158,7 @@ spec: type: array x-kubernetes-list-type: atomic required: - - extensionRef + - endpointPickerRef - selector - targetPorts type: object From 33c8e898711e7b0fb2c1c7af3a11031f1c76b43a Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Fri, 22 Aug 2025 16:32:26 -0700 Subject: [PATCH 7/8] final clean up --- apix/v1/inferencepool_types.go | 189 ++++++++++-------- apix/v1/shared_types.go | 5 +- ...ence.networking.k8s.io_inferencepools.yaml | 103 +++++----- 3 files changed, 157 insertions(+), 140 deletions(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index f53781675..21f4e77e4 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -24,7 +24,7 @@ import ( // // +kubebuilder:object:root=true // TODO: change the annotation once it gets officially approved -// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=unapproved, experimental-only" +// +kubebuilder:metadata:annotations="api-approved.kubernetes.io=https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/1173" // +kubebuilder:resource:shortName=infpool // +kubebuilder:subresource:status // +kubebuilder:storageversion @@ -35,17 +35,19 @@ type InferencePool struct { // +optional metav1.ObjectMeta `json:"metadata,omitempty"` + // Spec defines the desired state of the InferencePool. + // // +required Spec InferencePoolSpec `json:"spec,omitzero"` - // Status defines the observed state of InferencePool. + // Status defines the observed state of the InferencePool. // - // +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}} // +optional - Status InferencePoolStatus `json:"status,omitzero"` + //nolint:kubeapilinter // status should not be a pointer. + Status InferencePoolStatus `json:"status,omitempty"` } -// InferencePoolList contains a list of InferencePool. +// InferencePoolList contains a list of InferencePools. // // +kubebuilder:object:root=true type InferencePoolList struct { @@ -54,7 +56,7 @@ type InferencePoolList struct { Items []InferencePool `json:"items"` } -// InferencePoolSpec defines the desired state of InferencePool +// InferencePoolSpec defines the desired state of the InferencePool. type InferencePoolSpec struct { // Selector determines which Pods are members of this inference pool. // It matches Pods by their labels only within the same namespace; cross-namespace @@ -65,19 +67,22 @@ type InferencePoolSpec struct { // this configuration into a Service resource. // // +required - Selector LabelSelector `json:"selector,omitempty,omitzero"` + Selector LabelSelector `json:"selector,omitzero"` // TargetPorts defines a list of ports that are exposed by this InferencePool. // Currently, the list may only include a single port definition. + // // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 // +listType=atomic // +required TargetPorts []Port `json:"targetPorts,omitempty"` - // EndpointPickerRef configures an endpoint picker as an extension service. + // EndpointPickerRef is a reference to the Endpoint Picker extension and its + // associated configuration. + // // +required - EndpointPickerRef EndpointPickerRef `json:"endpointPickerRef,omitempty,omitzero"` + EndpointPickerRef EndpointPickerRef `json:"endpointPickerRef,omitzero"` } // Port defines the network port that will be exposed by this InferencePool. @@ -89,10 +94,11 @@ type Port struct { Number PortNumber `json:"number,omitempty"` } -// Extension specifies how to configure an extension that runs the endpoint picker. +// EndpointPickerRef specifies a reference to an Endpoint Picker extension and its +// associated configuration. type EndpointPickerRef struct { - // Group is the group of the referent. - // The default value is "", representing the Core API group. + // Group is the group of the referent API object. When unspecified, the default value + // is "", representing the Core API group. // // +optional // +kubebuilder:default="" @@ -100,6 +106,8 @@ type EndpointPickerRef struct { // Kind is the Kubernetes resource kind of the referent. // + // Required if the referent is ambiguous, e.g. service with multiple ports. + // // Defaults to "Service" when not specified. // // ExternalName services can refer to CNAME DNS records that may live @@ -110,67 +118,71 @@ type EndpointPickerRef struct { // // +optional // +kubebuilder:default=Service - Kind Kind `json:"kind,omitempty"` + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. + Kind *Kind `json:"kind,omitempty"` - // Name is the name of the referent. + // Name is the name of the referent API object. // // +required Name ObjectName `json:"name,omitempty"` - // The port number on the service running the extension. When unspecified, - // implementations SHOULD infer a default value of 9002 when the Kind is - // Service. + // PortNumber is the port number of the Endpoint Picker extension service. When unspecified, + // implementations SHOULD infer a default value of 9002 when the kind field is "Service" or + // unspecified (defaults to "Service"). // // +optional - //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer here as 0 usually means all ports. + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. PortNumber *PortNumber `json:"portNumber,omitempty"` - // Configures how the gateway handles the case when the extension is not responsive. - // Defaults to failClose. + // FailureMode configures how the parent handles the case when the Endpoint Picker extension + // is non-responsive. When unspecified, defaults to "FailClose". // // +optional // +kubebuilder:default="FailClose" - FailureMode ExtensionFailureMode `json:"failureMode,omitempty"` + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. + FailureMode *EndpointPickerFailureMode `json:"failureMode,omitempty"` } -// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not -// responsive. +// EndpointPickerFailureMode defines the options for how the parent handles the case when the +// Endpoint Picker extension is non-responsive. +// // +kubebuilder:validation:Enum=FailOpen;FailClose -type ExtensionFailureMode string +type EndpointPickerFailureMode string const ( - // FailOpen specifies that the proxy should forward the request to an endpoint of its picking when the Endpoint Picker fails. - FailOpen ExtensionFailureMode = "FailOpen" - // FailClose specifies that the proxy should drop the request when the Endpoint Picker fails. - FailClose ExtensionFailureMode = "FailClose" + // EndpointPickerFailOpen specifies that the parent should forward the request to an endpoint + // of its picking when the Endpoint Picker extension fails. + EndpointPickerFailOpen EndpointPickerFailureMode = "FailOpen" + // EndpointPickerFailClose specifies that the parent should drop the request when the Endpoint + // Picker extension fails. + EndpointPickerFailClose EndpointPickerFailureMode = "FailClose" ) -// InferencePoolStatus defines the observed state of InferencePool. -// +kubebuilder:validation:MinProperties=1 +// InferencePoolStatus defines the observed state of the InferencePool. type InferencePoolStatus struct { - // Parents is a list of parent resources (usually Gateways) that are - // associated with the InferencePool, and the status of the InferencePool with respect to - // each parent. + // Parents is a list of parent resources, typically Gateways, that are associated with + // the InferencePool, and the status of the InferencePool with respect to each parent. // - // A maximum of 32 Gateways will be represented in this list. When the list contains - // `kind: Status, name: default`, it indicates that the InferencePool is not - // associated with any Gateway and a controller must perform the following: + // A controller that manages the InferencePool, must add an entry for each parent it manages + // and remove the parent entry when the controller no longer considers the InferencePool to + // be associated with that parent. // - // - Remove the parent when setting the "Accepted" condition. - // - Add the parent when the controller will no longer manage the InferencePool - // and no other parents exist. + // A maximum of 32 parents will be represented in this list. When the list is empty, + // it indicates that the InferencePool is not associated with any parents. // // +kubebuilder:validation:MaxItems=32 // +optional // +listType=atomic - Parents []PoolStatus `json:"parent,omitempty"` + Parents []ParentStatus `json:"parents,omitempty"` } -// PoolStatus defines the observed state of InferencePool from a Gateway. -type PoolStatus struct { - // Conditions track the state of the InferencePool. +// ParentStatus defines the observed state of InferencePool from a Parent, i.e. Gateway. +type ParentStatus struct { + // Conditions is a list of status conditions that provide information about the observed + // state of the InferencePool. This field is required to be set by the controller that + // manages the InferencePool. // - // Known condition types are: + // Supported condition types are: // // * "Accepted" // * "ResolvedRefs" @@ -179,31 +191,33 @@ type PoolStatus struct { // +listType=map // +listMapKey=type // +kubebuilder:validation:MaxItems=8 - // +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} Conditions []metav1.Condition `json:"conditions,omitempty"` - // GatewayRef indicates the gateway that observed state of InferencePool. + // ParentRef is used to identify the parent resource that this status + // is associated with. It is used to match the InferencePool with the parent + // resource, such as a Gateway. + // // +required - GatewayRef ParentGatewayReference `json:"parentRef,omitzero"` + ParentRef ParentReference `json:"parentRef,omitzero"` } -// InferencePoolConditionType is a type of condition for the InferencePool +// InferencePoolConditionType is a type of status condition for the InferencePool. type InferencePoolConditionType string -// InferencePoolReason is the reason for a given InferencePoolConditionType +// InferencePoolReason is the reason for a type of InferencePool status condition. type InferencePoolReason string const ( - // This condition indicates whether the InferencePool has been accepted or rejected - // by a Gateway, and why. + // InferencePoolConditionAccepted is a type of condition that indicates whether + // the InferencePool has been accepted or rejected by a Parent, and why. // // Possible reasons for this condition to be True are: // - // * "Accepted" + // * "SupportedByParent" // // Possible reasons for this condition to be False are: // - // * "NotSupportedByGateway" + // * "Accepted" // * "HTTPRouteNotAccepted" // // Possible reasons for this condition to be Unknown are: @@ -214,28 +228,26 @@ const ( // prefer to use the reasons listed above to improve interoperability. InferencePoolConditionAccepted InferencePoolConditionType = "Accepted" - // This reason is used with the "Accepted" condition when the InferencePool has been - // accepted by the Gateway. + // InferencePoolReasonAccepted is a reason used with the "Accepted" condition + // when the InferencePool is accepted by a Parent because the Parent supports + // InferencePool as a backend. InferencePoolReasonAccepted InferencePoolReason = "Accepted" - // This reason is used with the "Accepted" condition when the InferencePool - // has not been accepted by a Gateway because the Gateway does not support - // InferencePool as a backend. - InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway" + // InferencePoolReasonNotSupportedByParent is a reason used with the "Accepted" + // condition when the InferencePool has not been accepted by a Parent because + // the Parent does not support InferencePool as a backend. + InferencePoolReasonNotSupportedByParent InferencePoolReason = "NotSupportedByParent" - // This reason is used with the "Accepted" condition when the InferencePool is - // referenced by an HTTPRoute that has been rejected by the Gateway. The user - // should inspect the status of the referring HTTPRoute for the specific reason. + // InferencePoolReasonHTTPRouteNotAccepted is an optional reason used with the + // "Accepted" condition when the InferencePool is referenced by an HTTPRoute that + // has been rejected by the Parent. The user should inspect the status of the + // referring HTTPRoute for the specific reason. InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted" - - // This reason is used with the "Accepted" when a controller has not yet - // reconciled the InferencePool. - InferencePoolReasonPending InferencePoolReason = "Pending" ) const ( - // This condition indicates whether the controller was able to resolve all - // the object references for the InferencePool. + // InferencePoolConditionResolvedRefs is a type of condition that indicates whether + // the controller was able to resolve all the object references for the InferencePool. // // Possible reasons for this condition to be True are: // @@ -249,39 +261,48 @@ const ( // prefer to use the reasons listed above to improve interoperability. InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs" - // This reason is used with the "ResolvedRefs" condition when the condition - // is true. + // InferencePoolReasonResolvedRefs is a reason used with the "ResolvedRefs" + // condition when the condition is true. InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs" - // This reason is used with the "ResolvedRefs" condition when the - // Extension is invalid in some way. This can include an unsupported kind - // or API group, or a reference to a resource that can not be found. + // InferencePoolReasonInvalidExtensionRef is a reason used with the "ResolvedRefs" + // condition when the Extension is invalid in some way. This can include an + // unsupported kind or API group, or a reference to a resource that cannot be found. InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef" ) -// ParentGatewayReference identifies an API object including its namespace, -// defaulting to Gateway. -type ParentGatewayReference struct { - // Group is the group of the referent. +// ParentReference identifies an API object. It is used to associate the InferencePool with a +// parent resource, such as a Gateway. +type ParentReference struct { + // Group is the group of the referent API object. When unspecified, the referent is assumed + // to be in the "gateway.networking.k8s.io" API group. // // +optional // +kubebuilder:default="gateway.networking.k8s.io" Group *Group `json:"group,omitempty"` - // Kind is kind of the referent. For example "Gateway". + // Kind is the kind of the referent API object. When unspecified, the referent is assumed + // to be a "Gateway" kind. // // +optional // +kubebuilder:default=Gateway - Kind Kind `json:"kind,omitempty"` + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. + Kind *Kind `json:"kind,omitempty"` - // Name is the name of the referent. + // Name is the name of the referent API object. + // // +required Name ObjectName `json:"name,omitempty"` - // Namespace is the namespace of the referent. If not present, - // the namespace of the referent is assumed to be the same as - // the namespace of the referring object. + // Namespace is the namespace of the referenced object. When unspecified, the local + // namespace is inferred. + // + // Note that when a namespace different than the local namespace is specified, + // a ReferenceGrant object is required in the referent namespace to allow that + // namespace's owner to accept the reference. See the ReferenceGrant + // documentation for details: https://gateway-api.sigs.k8s.io/api-types/referencegrant/ // // +optional - Namespace Namespace `json:"namespace,omitempty"` + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. + Namespace *Namespace `json:"namespace,omitempty"` } diff --git a/apix/v1/shared_types.go b/apix/v1/shared_types.go index 64425e735..bc315fd4f 100644 --- a/apix/v1/shared_types.go +++ b/apix/v1/shared_types.go @@ -132,11 +132,12 @@ type LabelValue string // LabelSelector defines a query for resources based on their labels. // This simplified version uses only the matchLabels field. type LabelSelector struct { - // matchLabels contains a set of required {key,value} pairs. + // MatchLabels contains a set of required {key,value} pairs. // An object must match every label in this map to be selected. // The matching logic is an AND operation on all entries. // // +required + // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=64 - MatchLabels map[LabelKey]LabelValue `json:"matchLabels,omitempty" protobuf:"bytes,1,rep,name=matchLabels"` + MatchLabels map[LabelKey]LabelValue `json:"matchLabels,omitempty"` } diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index a30183977..94b5c0ff2 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - api-approved.kubernetes.io: unapproved, experimental-only + api-approved.kubernetes.io: https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/1173 inference.networking.k8s.io/bundle-version: main-dev creationTimestamp: null name: inferencepools.inference.networking.k8s.io @@ -41,17 +41,18 @@ spec: metadata: type: object spec: - description: InferencePoolSpec defines the desired state of InferencePool + description: Spec defines the desired state of the InferencePool. properties: endpointPickerRef: - description: EndpointPickerRef configures an endpoint picker as an - extension service. + description: |- + EndpointPickerRef is a reference to the Endpoint Picker extension and its + associated configuration. properties: failureMode: default: FailClose description: |- - Configures how the gateway handles the case when the extension is not responsive. - Defaults to failClose. + FailureMode configures how the parent handles the case when the Endpoint Picker extension + is non-responsive. When unspecified, defaults to "FailClose". enum: - FailOpen - FailClose @@ -59,8 +60,8 @@ spec: group: default: "" description: |- - Group is the group of the referent. - The default value is "", representing the Core API group. + Group is the group of the referent API object. When unspecified, the default value + is "", representing the Core API group. maxLength: 253 minLength: 0 pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ @@ -70,6 +71,8 @@ spec: description: |- Kind is the Kubernetes resource kind of the referent. + Required if the referent is ambiguous, e.g. service with multiple ports. + Defaults to "Service" when not specified. ExternalName services can refer to CNAME DNS records that may live @@ -82,15 +85,15 @@ spec: pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ type: string name: - description: Name is the name of the referent. + description: Name is the name of the referent API object. maxLength: 253 minLength: 1 type: string portNumber: description: |- - The port number on the service running the extension. When unspecified, - implementations SHOULD infer a default value of 9002 when the Kind is - Service. + PortNumber is the port number of the Endpoint Picker extension service. When unspecified, + implementations SHOULD infer a default value of 9002 when the kind field is "Service" or + unspecified (defaults to "Service"). format: int32 maximum: 65535 minimum: 1 @@ -127,7 +130,7 @@ spec: pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ type: string description: |- - matchLabels contains a set of required {key,value} pairs. + MatchLabels contains a set of required {key,value} pairs. An object must match every label in this map to be selected. The matching logic is an AND operation on all entries. type: object @@ -163,48 +166,30 @@ spec: - targetPorts type: object status: - default: - parent: - - conditions: - - lastTransitionTime: "1970-01-01T00:00:00Z" - message: Waiting for controller - reason: Pending - status: Unknown - type: Accepted - parentRef: - kind: Status - name: default - description: Status defines the observed state of InferencePool. - minProperties: 1 + description: Status defines the observed state of the InferencePool. properties: - parent: + parents: description: |- - Parents is a list of parent resources (usually Gateways) that are - associated with the InferencePool, and the status of the InferencePool with respect to - each parent. + Parents is a list of parent resources, typically Gateways, that are associated with + the InferencePool, and the status of the InferencePool with respect to each parent. - A maximum of 32 Gateways will be represented in this list. When the list contains - `kind: Status, name: default`, it indicates that the InferencePool is not - associated with any Gateway and a controller must perform the following: + A controller that manages the InferencePool, must add an entry for each parent it manages + and remove the parent entry when the controller no longer considers the InferencePool to + be associated with that parent. - - Remove the parent when setting the "Accepted" condition. - - Add the parent when the controller will no longer manage the InferencePool - and no other parents exist. + A maximum of 32 parents will be represented in this list. When the list is empty, + it indicates that the InferencePool is not associated with any parents. items: - description: PoolStatus defines the observed state of InferencePool - from a Gateway. + description: ParentStatus defines the observed state of InferencePool + from a Parent, i.e. Gateway. properties: conditions: - default: - - lastTransitionTime: "1970-01-01T00:00:00Z" - message: Waiting for controller - reason: Pending - status: Unknown - type: Accepted description: |- - Conditions track the state of the InferencePool. + Conditions is a list of status conditions that provide information about the observed + state of the InferencePool. This field is required to be set by the controller that + manages the InferencePool. - Known condition types are: + Supported condition types are: * "Accepted" * "ResolvedRefs" @@ -269,33 +254,43 @@ spec: - type x-kubernetes-list-type: map parentRef: - description: GatewayRef indicates the gateway that observed - state of InferencePool. + description: |- + ParentRef is used to identify the parent resource that this status + is associated with. It is used to match the InferencePool with the parent + resource, such as a Gateway. properties: group: default: gateway.networking.k8s.io - description: Group is the group of the referent. + description: |- + Group is the group of the referent API object. When unspecified, the referent is assumed + to be in the "gateway.networking.k8s.io" API group. maxLength: 253 minLength: 0 pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string kind: default: Gateway - description: Kind is kind of the referent. For example "Gateway". + description: |- + Kind is the kind of the referent API object. When unspecified, the referent is assumed + to be a "Gateway" kind. maxLength: 63 minLength: 1 pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ type: string name: - description: Name is the name of the referent. + description: Name is the name of the referent API object. maxLength: 253 minLength: 1 type: string namespace: description: |- - Namespace is the namespace of the referent. If not present, - the namespace of the referent is assumed to be the same as - the namespace of the referring object. + Namespace is the namespace of the referenced object. When unspecified, the local + namespace is inferred. + + Note that when a namespace different than the local namespace is specified, + a ReferenceGrant object is required in the referent namespace to allow that + namespace's owner to accept the reference. See the ReferenceGrant + documentation for details: https://gateway-api.sigs.k8s.io/api-types/referencegrant/ maxLength: 63 minLength: 1 pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ From 50335126da41f6da49cd9735539b57a9a951ee56 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 25 Aug 2025 12:07:24 -0700 Subject: [PATCH 8/8] clean up pointer --- apix/v1/inferencepool_types.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/apix/v1/inferencepool_types.go b/apix/v1/inferencepool_types.go index 21f4e77e4..af945827e 100644 --- a/apix/v1/inferencepool_types.go +++ b/apix/v1/inferencepool_types.go @@ -118,8 +118,7 @@ type EndpointPickerRef struct { // // +optional // +kubebuilder:default=Service - //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. - Kind *Kind `json:"kind,omitempty"` + Kind Kind `json:"kind,omitempty"` // Name is the name of the referent API object. // @@ -131,7 +130,7 @@ type EndpointPickerRef struct { // unspecified (defaults to "Service"). // // +optional - //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. + //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer as zero means all ports in convention, we don't make to use 0 to indicate not set. PortNumber *PortNumber `json:"portNumber,omitempty"` // FailureMode configures how the parent handles the case when the Endpoint Picker extension @@ -139,8 +138,7 @@ type EndpointPickerRef struct { // // +optional // +kubebuilder:default="FailClose" - //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. - FailureMode *EndpointPickerFailureMode `json:"failureMode,omitempty"` + FailureMode EndpointPickerFailureMode `json:"failureMode,omitempty"` } // EndpointPickerFailureMode defines the options for how the parent handles the case when the @@ -286,8 +284,7 @@ type ParentReference struct { // // +optional // +kubebuilder:default=Gateway - //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. - Kind *Kind `json:"kind,omitempty"` + Kind Kind `json:"kind,omitempty"` // Name is the name of the referent API object. // @@ -303,6 +300,5 @@ type ParentReference struct { // documentation for details: https://gateway-api.sigs.k8s.io/api-types/referencegrant/ // // +optional - //nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer for optional struct. - Namespace *Namespace `json:"namespace,omitempty"` + Namespace Namespace `json:"namespace,omitempty"` }