elastic · prwhelan · Jul 16, 2025 · Jul 16, 2025 · prwhelan · Jul 16, 2025
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
@@ -8,6 +8,8 @@ alibabacloud-api-keys,https://opensearch.console.aliyun.com/cn-shanghai/rag/api-
 analysis-analyzers,https://www.elastic.co/docs/reference/text-analysis/analyzer-reference,,
 amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html,,
 amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html,,
+amazonsagemaker-invoke,https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html,,
+amazonsagemaker-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html,,
 analysis-charfilters,https://www.elastic.co/docs/reference/text-analysis/character-filter-reference,,
 analysis-normalizers,https://www.elastic.co/docs/reference/text-analysis/normalizers,,
 analysis-standard-analyzer,https://www.elastic.co/docs/reference/text-analysis/analysis-standard-analyzer,,
@@ -354,6 +356,7 @@ inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elast
 inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html,
 inference-api-put-alibabacloud,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html,
 inference-api-put-amazonbedrock,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html,
+inference-api-put-amazonsagemaker,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-sagemaker.html,
 inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html,
 inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html,
 inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html,

diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -442,6 +442,149 @@ export enum AmazonBedrockServiceType {
   amazonbedrock
 }
 
+export class AmazonSageMakerServiceSettings {
+  /**
+   * A valid AWS access key that has permissions to use Amazon SageMaker and access to models for invoke requests.
+   */
+  access_key: string
+  /**
+   * The name of the SageMaker Endpoint.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  endpoint_name: string
+  /**
+   * The API format to use when calling SageMaker.
+   * This will structure the payload when invoking the SageMaker endpoint.
+   */
+  api: AmazonSageMakerApi
+  /**
+   * The region that your endpoint or ARN is deployed in.
+   * The list of available regions per model can be found in the Amazon SageMaker documentation.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  region: string
+  /**
+   * A valid AWS secret key that is paired with the `access_key`.
+   * For informationg about creating and managing access and secret keys, refer to the AWS documentation.
+   * @ext_doc_id amazonsagemaker-secret-keys
+   */
+  secret_key: string
+  /**
+   * The model id when calling a multi-model endpoint.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  target_model?: string
+  /**
+   * The container to directly invoke when calling a multi-container endpoint.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  target_container_hostname?: string
+  /**
+   * The inference component to directly invoke when calling a multi-component endpoint.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  inference_component_name?: string
+  /**
+   * The maximum number of inputs in each batch. This value is used by inference ingestion pipelines
+   * when processing semantic values. It correlates to the number of times the SageMaker endpoint is
+   * invoked (one per batch of input).
+   * @server_default 256
+   */
+  batch_size?: integer
+  /**
+   * The number of dimensions returned by the text_embedding models. If this value is not provided, then
+   * it is guessed by making invoking the Endpoint for the text_embedding task.
+   */
+  dimensions?: integer
+}
+
+export enum AmazonSageMakerApi {
+  openai,
+  elastic
+}
+
+/**
+ * Service Settings specific to the Elastic API for the Amazon SageMaker service.
+ */
+export class AmazonSageMakerElasticServiceSettings extends AmazonSageMakerServiceSettings {
+  /**
+   * Similarity measure used when invoking the text_embedding task type.
+   */
+  similarity?: AmazonSageMakerSimilarity
+
+  /**
+   * The data type returned by the text_embedding model.
+   * This value must be set when task_type is text_embedding and is used when parsing the response
+   * back to Elasticsearch data structures.
+   */
+  element_type: AmazonSageMakerElementType
+}
+
+export enum AmazonSageMakerSimilarity {
+  cosine,
+  dot_product,
+  l2_norm
+}
+
+export enum AmazonSageMakerElementType {
+  byte,
+  float,
+  bit
+}
+
+export interface AmazonSageMakerTaskSettings {
+  /**
+   * The AWS custom attributes passed verbatim through to the model running in the SageMaker Endpoint.
+   * Values will be returned in the `X-elastic-sagemaker-custom-attributes` header.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  custom_attributes?: string
+  /**
+   * The optional JMESPath expression used to override the EnableExplanations provided during endpoint creation.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  enable_explanations?: string
+  /**
+   * The capture data id when enabled in the Endpoint.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  inference_id?: string
+  /**
+   * The stateful session identifier for a new or existing session.
+   * New sessions will be returned in the `X-elastic-sagemaker-new-session-id` header.
+   * Closed sessions will be returned in the `X-elastic-sagemaker-closed-session-id` header.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  session_id?: string
+  /**
+   * Specifies the variant when running with multi-variant Endpoints.
+   * @ext_doc_id amazonsagemaker-invoke
+   */
+  target_variant?: string
+}
+
+/**
+ * `elastic` API allows any key value pair in the task settings when calling the inference endpoint, but it cannot
+ * be used when creating the inference endpoint.
+ */
+export class AmazonSageMakerElasticTaskSettings
+  implements AmazonSageMakerTaskSettings
+{
+  [key: string]: unknown
+}
+
+/**
+ * `openai` API-specific task settings for Amazon SageMaker.
+ */
+export interface AmazonSageMakerOpenAiTaskSettings
+  extends AmazonSageMakerTaskSettings {
+  user?: string
+}
+
+export enum AmazonSageMakerServiceType {
+  amazon_sagemaker
+}
+
 export class AnthropicServiceSettings {
   /**
    * A valid API key for the Anthropic API.

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -23,6 +23,7 @@ import {
   TaskType,
   TaskTypeAlibabaCloudAI,
   TaskTypeAmazonBedrock,
+  TaskTypeAmazonSageMaker,
   TaskTypeAnthropic,
   TaskTypeAzureAIStudio,
   TaskTypeAzureOpenAI,
@@ -109,6 +110,17 @@ export class InferenceEndpointInfoAmazonBedrock extends InferenceEndpoint {
   task_type: TaskTypeAmazonBedrock
 }
 
+export class InferenceEndpointInfoAmazonSageMaker extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeAmazonSageMaker
+}
+
 export class InferenceEndpointInfoAnthropic extends InferenceEndpoint {
   /**
    * The inference Id

diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -45,6 +45,14 @@ export enum TaskTypeAmazonBedrock {
   completion
 }
 
+export enum TaskTypeAmazonSageMaker {
+  text_embedding,
+  completion,
+  chat_completion,
+  sparse_embedding,
+  rerank
+}
+
 export enum TaskTypeAnthropic {
   completion
 }

diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -33,6 +33,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * The following integrations are available through the inference API. You can find the available task types next to the integration name:
  * * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)
  * * Amazon Bedrock (`completion`, `text_embedding`)
+ * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
  * * Anthropic (`completion`)
  * * Azure AI Studio (`completion`, `text_embedding`)
  * * Azure OpenAI (`completion`, `text_embedding`)

diff --git a/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts
@@ -0,0 +1,86 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import {
+  AmazonSageMakerServiceSettings,
+  AmazonSageMakerServiceType,
+  AmazonSageMakerTaskSettings
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
+import { TaskTypeAmazonSageMaker } from '@inference/_types/TaskType'
+
+/**
+ * Create an Amazon SageMaker inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `amazon_sagemaker` service.
+ * @rest_spec_name inference.put_amazonsagemaker
+ * @availability stack since=9.1.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-amazonsagemaker
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{amazonsagemaker_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     */
+    task_type: TaskTypeAmazonSageMaker
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    amazonsagemaker_inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The chunking configuration object.
+     * @ext_doc_id inference-chunking
+     */
+    chunking_settings?: InferenceChunkingSettings
+    /**
+     * The type of service supported for the specified task type. In this case, `amazon_sagemaker`.
+     */
+    service: AmazonSageMakerServiceType
+    /**
+     * Settings used to install the inference model.
+     * These settings are specific to the `amazon_sagemaker` service and `service_settings.api` you specified.
+     */
+    service_settings: AmazonSageMakerServiceSettings
+    /**
+     * Settings to configure the inference task.
+     * These settings are specific to the task type and `service_settings.api` you specified.
+     */
+    task_settings?: AmazonSageMakerTaskSettings
+  }
+}