diff --git a/output/schema/schema.json b/output/schema/schema.json index e189bc5736..40edeef4f1 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9920,7 +9920,7 @@ "visibility": "public" } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`completion`, `chat_completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`completion`, `chat_completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", "docId": "inference-api-put", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put", "extPreviousVersionDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html", @@ -10054,20 +10054,38 @@ }, { "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, "stack": { + "since": "9.1.0", "stability": "stable", "visibility": "public" } }, - "description": "Configure a Amazon SageMaker inference endpoint", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-sagemaker.html", + "description": "Create an Amazon SageMaker inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `amazon_sagemaker` service.", + "docId": "inference-api-put-amazonsagemaker", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker", + "extPreviousVersionDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-sagemaker.html", "name": "inference.put_amazonsagemaker", - "request": null, + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonsagemaker" + }, "requestBodyRequired": false, "requestMediaType": [ "application/json" ], - "response": null, + "response": { + "name": "Response", + "namespace": "inference.put_amazonsagemaker" + }, "responseMediaType": [ "application/json" ], @@ -166440,6 +166458,258 @@ }, "specLocation": "inference/_types/CommonTypes.ts#L436-L439" }, + { + "kind": "enum", + "members": [ + { + "name": "openai" + }, + { + "name": "elastic" + } + ], + "name": { + "name": "AmazonSageMakerApi", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L501-L504" + }, + { + "kind": "interface", + "name": { + "name": "AmazonSageMakerServiceSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon SageMaker and access to models for invoke requests.", + "name": "access_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the SageMaker Endpoint.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "endpoint_name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The API format to use when calling SageMaker.\nThis will structure the payload when invoking the SageMaker endpoint.", + "name": "api", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonSageMakerApi", + "namespace": "inference._types" + } + } + }, + { + "description": "The region that your endpoint or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon SageMaker documentation.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonsagemaker-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model id when calling a multi-model endpoint.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "target_model", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The container to directly invoke when calling a multi-container endpoint.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "target_container_hostname", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The inference component to directly invoke when calling a multi-component endpoint.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "inference_component_name", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The maximum number of inputs in each batch. This value is used by inference ingestion pipelines\nwhen processing semantic values. It correlates to the number of times the SageMaker endpoint is\ninvoked (one per batch of input).", + "name": "batch_size", + "required": false, + "serverDefault": 256, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The number of dimensions returned by the text_embedding models. If this value is not provided, then\nit is guessed by making invoking the Endpoint for the text_embedding task.", + "name": "dimensions", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L445-L499" + }, + { + "kind": "enum", + "members": [ + { + "name": "amazon_sagemaker" + } + ], + "name": { + "name": "AmazonSageMakerServiceType", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L581-L583" + }, + { + "kind": "interface", + "name": { + "name": "AmazonSageMakerTaskSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The AWS custom attributes passed verbatim through to the model running in the SageMaker Endpoint.\nValues will be returned in the `X-elastic-sagemaker-custom-attributes` header.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "custom_attributes", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The optional JMESPath expression used to override the EnableExplanations provided during endpoint creation.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "enable_explanations", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The capture data id when enabled in the Endpoint.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The stateful session identifier for a new or existing session.\nNew sessions will be returned in the `X-elastic-sagemaker-new-session-id` header.\nClosed sessions will be returned in the `X-elastic-sagemaker-closed-session-id` header.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "session_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Specifies the variant when running with multi-variant Endpoints.", + "extDocId": "amazonsagemaker-invoke", + "extDocUrl": "https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html", + "name": "target_variant", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L535-L564" + }, { "kind": "interface", "name": { @@ -166485,7 +166755,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L445-L461" + "specLocation": "inference/_types/CommonTypes.ts#L585-L601" }, { "kind": "enum", @@ -166498,7 +166768,7 @@ "name": "AnthropicServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L494-L496" + "specLocation": "inference/_types/CommonTypes.ts#L634-L636" }, { "kind": "interface", @@ -166558,7 +166828,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L463-L488" + "specLocation": "inference/_types/CommonTypes.ts#L603-L628" }, { "kind": "enum", @@ -166571,7 +166841,7 @@ "name": "AnthropicTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L490-L492" + "specLocation": "inference/_types/CommonTypes.ts#L630-L632" }, { "kind": "interface", @@ -166645,7 +166915,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L498-L540" + "specLocation": "inference/_types/CommonTypes.ts#L638-L680" }, { "kind": "enum", @@ -166658,7 +166928,7 @@ "name": "AzureAiStudioServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L577-L579" + "specLocation": "inference/_types/CommonTypes.ts#L717-L719" }, { "kind": "interface", @@ -166729,7 +166999,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L542-L570" + "specLocation": "inference/_types/CommonTypes.ts#L682-L710" }, { "kind": "enum", @@ -166745,7 +167015,7 @@ "name": "AzureAiStudioTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L572-L575" + "specLocation": "inference/_types/CommonTypes.ts#L712-L715" }, { "kind": "interface", @@ -166837,7 +167107,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L581-L626" + "specLocation": "inference/_types/CommonTypes.ts#L721-L766" }, { "kind": "enum", @@ -166850,7 +167120,7 @@ "name": "AzureOpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L641-L643" + "specLocation": "inference/_types/CommonTypes.ts#L781-L783" }, { "kind": "interface", @@ -166872,7 +167142,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L628-L634" + "specLocation": "inference/_types/CommonTypes.ts#L768-L774" }, { "kind": "enum", @@ -166888,7 +167158,7 @@ "name": "AzureOpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L636-L639" + "specLocation": "inference/_types/CommonTypes.ts#L776-L779" }, { "kind": "enum", @@ -166913,7 +167183,7 @@ "name": "CohereEmbeddingType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L698-L704" + "specLocation": "inference/_types/CommonTypes.ts#L838-L844" }, { "kind": "enum", @@ -166935,7 +167205,7 @@ "name": "CohereInputType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L706-L711" + "specLocation": "inference/_types/CommonTypes.ts#L846-L851" }, { "kind": "interface", @@ -167008,7 +167278,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L645-L686" + "specLocation": "inference/_types/CommonTypes.ts#L785-L826" }, { "kind": "enum", @@ -167021,7 +167291,7 @@ "name": "CohereServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L694-L696" + "specLocation": "inference/_types/CommonTypes.ts#L834-L836" }, { "kind": "enum", @@ -167040,7 +167310,7 @@ "name": "CohereSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L713-L717" + "specLocation": "inference/_types/CommonTypes.ts#L853-L857" }, { "kind": "interface", @@ -167098,7 +167368,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L725-L757" + "specLocation": "inference/_types/CommonTypes.ts#L865-L897" }, { "kind": "enum", @@ -167117,7 +167387,7 @@ "name": "CohereTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L688-L692" + "specLocation": "inference/_types/CommonTypes.ts#L828-L832" }, { "kind": "enum", @@ -167136,7 +167406,7 @@ "name": "CohereTruncateType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L719-L723" + "specLocation": "inference/_types/CommonTypes.ts#L859-L863" }, { "kind": "interface", @@ -167445,7 +167715,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L771-L793" + "specLocation": "inference/_types/CommonTypes.ts#L911-L933" }, { "kind": "enum", @@ -167458,7 +167728,7 @@ "name": "DeepSeekServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L795-L797" + "specLocation": "inference/_types/CommonTypes.ts#L935-L937" }, { "kind": "interface", @@ -167599,7 +167869,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L820-L854" + "specLocation": "inference/_types/CommonTypes.ts#L960-L994" }, { "kind": "enum", @@ -167612,7 +167882,7 @@ "name": "ElasticsearchServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L870-L872" + "specLocation": "inference/_types/CommonTypes.ts#L1010-L1012" }, { "kind": "interface", @@ -167635,7 +167905,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L856-L862" + "specLocation": "inference/_types/CommonTypes.ts#L996-L1002" }, { "kind": "enum", @@ -167654,7 +167924,7 @@ "name": "ElasticsearchTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L864-L868" + "specLocation": "inference/_types/CommonTypes.ts#L1004-L1008" }, { "kind": "interface", @@ -167700,7 +167970,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L874-L900" + "specLocation": "inference/_types/CommonTypes.ts#L1014-L1040" }, { "kind": "enum", @@ -167713,7 +167983,7 @@ "name": "ElserServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L906-L908" + "specLocation": "inference/_types/CommonTypes.ts#L1046-L1048" }, { "kind": "enum", @@ -167726,7 +167996,7 @@ "name": "ElserTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L902-L904" + "specLocation": "inference/_types/CommonTypes.ts#L1042-L1044" }, { "kind": "enum", @@ -167739,7 +168009,7 @@ "name": "GoogleAiServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L933-L935" + "specLocation": "inference/_types/CommonTypes.ts#L1073-L1075" }, { "kind": "interface", @@ -167787,7 +168057,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L910-L926" + "specLocation": "inference/_types/CommonTypes.ts#L1050-L1066" }, { "kind": "enum", @@ -167803,7 +168073,7 @@ "name": "GoogleAiStudioTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L928-L931" + "specLocation": "inference/_types/CommonTypes.ts#L1068-L1071" }, { "kind": "interface", @@ -167877,7 +168147,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L937-L963" + "specLocation": "inference/_types/CommonTypes.ts#L1077-L1103" }, { "kind": "enum", @@ -167890,7 +168160,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L983-L985" + "specLocation": "inference/_types/CommonTypes.ts#L1123-L1125" }, { "kind": "interface", @@ -167924,7 +168194,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L965-L974" + "specLocation": "inference/_types/CommonTypes.ts#L1105-L1114" }, { "kind": "enum", @@ -167946,7 +168216,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L976-L981" + "specLocation": "inference/_types/CommonTypes.ts#L1116-L1121" }, { "kind": "interface", @@ -168008,7 +168278,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L987-L1019" + "specLocation": "inference/_types/CommonTypes.ts#L1127-L1159" }, { "kind": "enum", @@ -168021,7 +168291,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1040-L1042" + "specLocation": "inference/_types/CommonTypes.ts#L1180-L1182" }, { "kind": "interface", @@ -168055,7 +168325,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1021-L1031" + "specLocation": "inference/_types/CommonTypes.ts#L1161-L1171" }, { "kind": "enum", @@ -168077,7 +168347,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1033-L1038" + "specLocation": "inference/_types/CommonTypes.ts#L1173-L1178" }, { "kind": "interface", @@ -168140,7 +168410,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L266-L295" + "specLocation": "inference/_types/Services.ts#L278-L307" }, { "kind": "interface", @@ -168199,7 +168469,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L43-L63" + "specLocation": "inference/_types/Services.ts#L44-L64" }, { "kind": "interface", @@ -168240,7 +168510,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L65-L77" + "specLocation": "inference/_types/Services.ts#L66-L78" }, { "kind": "interface", @@ -168280,7 +168550,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L90-L99" + "specLocation": "inference/_types/Services.ts#L91-L100" }, { "kind": "interface", @@ -168320,7 +168590,47 @@ } } ], - "specLocation": "inference/_types/Services.ts#L101-L110" + "specLocation": "inference/_types/Services.ts#L102-L111" + }, + { + "kind": "interface", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "name": { + "name": "InferenceEndpointInfoAmazonSageMaker", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The task type", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskTypeAmazonSageMaker", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L113-L122" }, { "kind": "interface", @@ -168360,7 +168670,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L112-L121" + "specLocation": "inference/_types/Services.ts#L124-L133" }, { "kind": "interface", @@ -168400,7 +168710,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L123-L132" + "specLocation": "inference/_types/Services.ts#L135-L144" }, { "kind": "interface", @@ -168440,7 +168750,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L134-L143" + "specLocation": "inference/_types/Services.ts#L146-L155" }, { "kind": "interface", @@ -168480,7 +168790,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L145-L154" + "specLocation": "inference/_types/Services.ts#L157-L166" }, { "kind": "interface", @@ -168520,7 +168830,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L156-L165" + "specLocation": "inference/_types/Services.ts#L168-L177" }, { "kind": "interface", @@ -168560,7 +168870,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L178-L187" + "specLocation": "inference/_types/Services.ts#L190-L199" }, { "kind": "interface", @@ -168600,7 +168910,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L167-L176" + "specLocation": "inference/_types/Services.ts#L179-L188" }, { "kind": "interface", @@ -168640,7 +168950,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L189-L198" + "specLocation": "inference/_types/Services.ts#L201-L210" }, { "kind": "interface", @@ -168680,7 +168990,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L200-L209" + "specLocation": "inference/_types/Services.ts#L212-L221" }, { "kind": "interface", @@ -168720,7 +169030,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L211-L220" + "specLocation": "inference/_types/Services.ts#L223-L232" }, { "kind": "interface", @@ -168760,7 +169070,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L79-L88" + "specLocation": "inference/_types/Services.ts#L80-L89" }, { "kind": "interface", @@ -168800,7 +169110,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L222-L231" + "specLocation": "inference/_types/Services.ts#L234-L243" }, { "kind": "interface", @@ -168840,7 +169150,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L233-L242" + "specLocation": "inference/_types/Services.ts#L245-L254" }, { "kind": "interface", @@ -168880,7 +169190,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L244-L253" + "specLocation": "inference/_types/Services.ts#L256-L265" }, { "kind": "interface", @@ -168920,7 +169230,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L255-L264" + "specLocation": "inference/_types/Services.ts#L267-L276" }, { "kind": "interface", @@ -169080,7 +169390,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1044-L1073" + "specLocation": "inference/_types/CommonTypes.ts#L1184-L1213" }, { "kind": "enum", @@ -169093,7 +169403,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1103-L1105" + "specLocation": "inference/_types/CommonTypes.ts#L1243-L1245" }, { "kind": "enum", @@ -169112,7 +169422,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1107-L1111" + "specLocation": "inference/_types/CommonTypes.ts#L1247-L1251" }, { "kind": "interface", @@ -169158,7 +169468,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1075-L1096" + "specLocation": "inference/_types/CommonTypes.ts#L1215-L1236" }, { "kind": "enum", @@ -169174,7 +169484,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1098-L1101" + "specLocation": "inference/_types/CommonTypes.ts#L1238-L1241" }, { "kind": "enum", @@ -169196,7 +169506,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1113-L1118" + "specLocation": "inference/_types/CommonTypes.ts#L1253-L1258" }, { "kind": "interface", @@ -169354,7 +169664,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1120-L1147" + "specLocation": "inference/_types/CommonTypes.ts#L1260-L1287" }, { "kind": "enum", @@ -169367,7 +169677,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1155-L1157" + "specLocation": "inference/_types/CommonTypes.ts#L1295-L1297" }, { "kind": "enum", @@ -169386,7 +169696,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1149-L1153" + "specLocation": "inference/_types/CommonTypes.ts#L1289-L1293" }, { "kind": "interface", @@ -169473,7 +169783,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1159-L1201" + "specLocation": "inference/_types/CommonTypes.ts#L1299-L1341" }, { "kind": "enum", @@ -169486,7 +169796,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1217-L1219" + "specLocation": "inference/_types/CommonTypes.ts#L1357-L1359" }, { "kind": "interface", @@ -169508,7 +169818,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1203-L1209" + "specLocation": "inference/_types/CommonTypes.ts#L1343-L1349" }, { "kind": "enum", @@ -169527,7 +169837,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1211-L1215" + "specLocation": "inference/_types/CommonTypes.ts#L1351-L1355" }, { "kind": "interface", @@ -169594,7 +169904,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L301-L327" + "specLocation": "inference/_types/Services.ts#L313-L339" }, { "kind": "interface", @@ -169742,7 +170052,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L297-L297", + "specLocation": "inference/_types/Services.ts#L309-L309", "type": { "kind": "user_defined_value" } @@ -169826,7 +170136,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L299-L299", + "specLocation": "inference/_types/Services.ts#L311-L311", "type": { "kind": "user_defined_value" } @@ -169894,6 +170204,31 @@ }, "specLocation": "inference/_types/TaskType.ts#L43-L46" }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + }, + { + "name": "completion" + }, + { + "name": "chat_completion" + }, + { + "name": "sparse_embedding" + }, + { + "name": "rerank" + } + ], + "name": { + "name": "TaskTypeAmazonSageMaker", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/TaskType.ts#L48-L54" + }, { "kind": "enum", "members": [ @@ -169905,7 +170240,7 @@ "name": "TaskTypeAnthropic", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L48-L50" + "specLocation": "inference/_types/TaskType.ts#L56-L58" }, { "kind": "enum", @@ -169921,7 +170256,7 @@ "name": "TaskTypeAzureAIStudio", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L52-L55" + "specLocation": "inference/_types/TaskType.ts#L60-L63" }, { "kind": "enum", @@ -169937,7 +170272,7 @@ "name": "TaskTypeAzureOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L57-L60" + "specLocation": "inference/_types/TaskType.ts#L65-L68" }, { "kind": "enum", @@ -169956,7 +170291,7 @@ "name": "TaskTypeCohere", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L62-L66" + "specLocation": "inference/_types/TaskType.ts#L70-L74" }, { "kind": "enum", @@ -169972,7 +170307,7 @@ "name": "TaskTypeDeepSeek", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L68-L71" + "specLocation": "inference/_types/TaskType.ts#L76-L79" }, { "kind": "enum", @@ -169985,7 +170320,7 @@ "name": "TaskTypeELSER", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L79-L81" + "specLocation": "inference/_types/TaskType.ts#L87-L89" }, { "kind": "enum", @@ -170004,7 +170339,7 @@ "name": "TaskTypeElasticsearch", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L73-L77" + "specLocation": "inference/_types/TaskType.ts#L81-L85" }, { "kind": "enum", @@ -170020,7 +170355,7 @@ "name": "TaskTypeGoogleAIStudio", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L83-L86" + "specLocation": "inference/_types/TaskType.ts#L91-L94" }, { "kind": "enum", @@ -170036,7 +170371,7 @@ "name": "TaskTypeGoogleVertexAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L88-L91" + "specLocation": "inference/_types/TaskType.ts#L96-L99" }, { "kind": "enum", @@ -170058,7 +170393,7 @@ "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L93-L98" + "specLocation": "inference/_types/TaskType.ts#L101-L106" }, { "kind": "enum", @@ -170093,7 +170428,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L100-L104" + "specLocation": "inference/_types/TaskType.ts#L108-L112" }, { "kind": "enum", @@ -170112,7 +170447,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L106-L110" + "specLocation": "inference/_types/TaskType.ts#L114-L118" }, { "kind": "enum", @@ -170128,7 +170463,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L112-L115" + "specLocation": "inference/_types/TaskType.ts#L120-L123" }, { "kind": "enum", @@ -170147,7 +170482,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L117-L121" + "specLocation": "inference/_types/TaskType.ts#L125-L129" }, { "kind": "interface", @@ -170393,7 +170728,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1221-L1252" + "specLocation": "inference/_types/CommonTypes.ts#L1361-L1392" }, { "kind": "enum", @@ -170406,7 +170741,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1285-L1287" + "specLocation": "inference/_types/CommonTypes.ts#L1425-L1427" }, { "kind": "interface", @@ -170466,7 +170801,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1254-L1278" + "specLocation": "inference/_types/CommonTypes.ts#L1394-L1418" }, { "kind": "enum", @@ -170482,7 +170817,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1280-L1283" + "specLocation": "inference/_types/CommonTypes.ts#L1420-L1423" }, { "kind": "interface", @@ -170570,7 +170905,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1289-L1327" + "specLocation": "inference/_types/CommonTypes.ts#L1429-L1467" }, { "kind": "enum", @@ -170583,7 +170918,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1335-L1337" + "specLocation": "inference/_types/CommonTypes.ts#L1475-L1477" }, { "kind": "enum", @@ -170602,7 +170937,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1329-L1333" + "specLocation": "inference/_types/CommonTypes.ts#L1469-L1473" }, { "kind": "request", @@ -171329,7 +171664,7 @@ } } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`completion`, `chat_completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`completion`, `chat_completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", "examples": { "InferencePutExample1": { "alternatives": [ @@ -171414,7 +171749,7 @@ } } ], - "specLocation": "inference/put/PutRequest.ts#L26-L87" + "specLocation": "inference/put/PutRequest.ts#L26-L88" }, { "kind": "response", @@ -171897,6 +172232,139 @@ }, "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L25" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `amazon_sagemaker`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonSageMakerServiceType", + "namespace": "inference._types" + } + } + }, + { + "description": "Settings used to install the inference model.\nThese settings are specific to the `amazon_sagemaker` service and `service_settings.api` you specified.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonSageMakerServiceSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type and `service_settings.api` you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonSageMakerTaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Create an Amazon SageMaker inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `amazon_sagemaker` service.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_amazonsagemaker" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskTypeAmazonSageMaker", + "namespace": "inference._types" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "amazonsagemaker_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference endpoint to be created.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts#L31-L86" + }, + { + "kind": "response", + "body": { + "kind": "value", + "codegenName": "endpoint_info", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfoAmazonSageMaker", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_amazonsagemaker" + }, + "specLocation": "inference/put_amazonsagemaker/PutAmazonSageMakerResponse.ts#L22-L25" + }, { "kind": "request", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 5de35d3bc2..dbdff7b414 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13648,6 +13648,31 @@ export interface InferenceAmazonBedrockTaskSettings { export type InferenceAmazonBedrockTaskType = 'completion' | 'text_embedding' +export type InferenceAmazonSageMakerApi = 'openai' | 'elastic' + +export interface InferenceAmazonSageMakerServiceSettings { + access_key: string + endpoint_name: string + api: InferenceAmazonSageMakerApi + region: string + secret_key: string + target_model?: string + target_container_hostname?: string + inference_component_name?: string + batch_size?: integer + dimensions?: integer +} + +export type InferenceAmazonSageMakerServiceType = 'amazon_sagemaker' + +export interface InferenceAmazonSageMakerTaskSettings { + custom_attributes?: string + enable_explanations?: string + inference_id?: string + session_id?: string + target_variant?: string +} + export interface InferenceAnthropicServiceSettings { api_key: string model_id: string @@ -13879,6 +13904,11 @@ export interface InferenceInferenceEndpointInfoAmazonBedrock extends InferenceIn task_type: InferenceTaskTypeAmazonBedrock } +export interface InferenceInferenceEndpointInfoAmazonSageMaker extends InferenceInferenceEndpoint { + inference_id: string + task_type: InferenceTaskTypeAmazonSageMaker +} + export interface InferenceInferenceEndpointInfoAnthropic extends InferenceInferenceEndpoint { inference_id: string task_type: InferenceTaskTypeAnthropic @@ -14066,6 +14096,8 @@ export type InferenceTaskTypeAlibabaCloudAI = 'text_embedding' | 'rerank' | 'com export type InferenceTaskTypeAmazonBedrock = 'text_embedding' | 'completion' +export type InferenceTaskTypeAmazonSageMaker = 'text_embedding' | 'completion' | 'chat_completion' | 'sparse_embedding' | 'rerank' + export type InferenceTaskTypeAnthropic = 'completion' export type InferenceTaskTypeAzureAIStudio = 'text_embedding' | 'completion' @@ -14240,6 +14272,20 @@ export interface InferencePutAmazonbedrockRequest extends RequestBase { export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfoAmazonBedrock +export interface InferencePutAmazonsagemakerRequest extends RequestBase { + task_type: InferenceTaskTypeAmazonSageMaker + amazonsagemaker_inference_id: Id + timeout?: Duration + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferenceAmazonSageMakerServiceType + service_settings: InferenceAmazonSageMakerServiceSettings + task_settings?: InferenceAmazonSageMakerTaskSettings + } +} + +export type InferencePutAmazonsagemakerResponse = InferenceInferenceEndpointInfoAmazonSageMaker + export interface InferencePutAnthropicRequest extends RequestBase { task_type: InferenceAnthropicTaskType anthropic_inference_id: Id diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index ddd47afb65..271760938d 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -8,6 +8,8 @@ alibabacloud-api-keys,https://opensearch.console.aliyun.com/cn-shanghai/rag/api- analysis-analyzers,https://www.elastic.co/docs/reference/text-analysis/analyzer-reference,, amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html,, amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html,, +amazonsagemaker-invoke,https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html,, +amazonsagemaker-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html,, analysis-charfilters,https://www.elastic.co/docs/reference/text-analysis/character-filter-reference,, analysis-normalizers,https://www.elastic.co/docs/reference/text-analysis/normalizers,, analysis-standard-analyzer,https://www.elastic.co/docs/reference/text-analysis/analysis-standard-analyzer,, @@ -354,6 +356,7 @@ inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elast inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html, inference-api-put-alibabacloud,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html, inference-api-put-amazonbedrock,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html, +inference-api-put-amazonsagemaker,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-sagemaker.html, inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html, inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html, inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html, diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 5f26a7675b..638c5fad72 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -442,6 +442,149 @@ export enum AmazonBedrockServiceType { amazonbedrock } +export class AmazonSageMakerServiceSettings { + /** + * A valid AWS access key that has permissions to use Amazon SageMaker and access to models for invoke requests. + */ + access_key: string + /** + * The name of the SageMaker Endpoint. + * @ext_doc_id amazonsagemaker-invoke + */ + endpoint_name: string + /** + * The API format to use when calling SageMaker. + * This will structure the payload when invoking the SageMaker endpoint. + */ + api: AmazonSageMakerApi + /** + * The region that your endpoint or ARN is deployed in. + * The list of available regions per model can be found in the Amazon SageMaker documentation. + * @ext_doc_id amazonsagemaker-invoke + */ + region: string + /** + * A valid AWS secret key that is paired with the `access_key`. + * For informationg about creating and managing access and secret keys, refer to the AWS documentation. + * @ext_doc_id amazonsagemaker-secret-keys + */ + secret_key: string + /** + * The model id when calling a multi-model endpoint. + * @ext_doc_id amazonsagemaker-invoke + */ + target_model?: string + /** + * The container to directly invoke when calling a multi-container endpoint. + * @ext_doc_id amazonsagemaker-invoke + */ + target_container_hostname?: string + /** + * The inference component to directly invoke when calling a multi-component endpoint. + * @ext_doc_id amazonsagemaker-invoke + */ + inference_component_name?: string + /** + * The maximum number of inputs in each batch. This value is used by inference ingestion pipelines + * when processing semantic values. It correlates to the number of times the SageMaker endpoint is + * invoked (one per batch of input). + * @server_default 256 + */ + batch_size?: integer + /** + * The number of dimensions returned by the text_embedding models. If this value is not provided, then + * it is guessed by making invoking the Endpoint for the text_embedding task. + */ + dimensions?: integer +} + +export enum AmazonSageMakerApi { + openai, + elastic +} + +/** + * Service Settings specific to the Elastic API for the Amazon SageMaker service. + */ +export class AmazonSageMakerElasticServiceSettings extends AmazonSageMakerServiceSettings { + /** + * Similarity measure used when invoking the text_embedding task type. + */ + similarity?: AmazonSageMakerSimilarity + + /** + * The data type returned by the text_embedding model. + * This value must be set when task_type is text_embedding and is used when parsing the response + * back to Elasticsearch data structures. + */ + element_type: AmazonSageMakerElementType +} + +export enum AmazonSageMakerSimilarity { + cosine, + dot_product, + l2_norm +} + +export enum AmazonSageMakerElementType { + byte, + float, + bit +} + +export interface AmazonSageMakerTaskSettings { + /** + * The AWS custom attributes passed verbatim through to the model running in the SageMaker Endpoint. + * Values will be returned in the `X-elastic-sagemaker-custom-attributes` header. + * @ext_doc_id amazonsagemaker-invoke + */ + custom_attributes?: string + /** + * The optional JMESPath expression used to override the EnableExplanations provided during endpoint creation. + * @ext_doc_id amazonsagemaker-invoke + */ + enable_explanations?: string + /** + * The capture data id when enabled in the Endpoint. + * @ext_doc_id amazonsagemaker-invoke + */ + inference_id?: string + /** + * The stateful session identifier for a new or existing session. + * New sessions will be returned in the `X-elastic-sagemaker-new-session-id` header. + * Closed sessions will be returned in the `X-elastic-sagemaker-closed-session-id` header. + * @ext_doc_id amazonsagemaker-invoke + */ + session_id?: string + /** + * Specifies the variant when running with multi-variant Endpoints. + * @ext_doc_id amazonsagemaker-invoke + */ + target_variant?: string +} + +/** + * `elastic` API allows any key value pair in the task settings when calling the inference endpoint, but it cannot + * be used when creating the inference endpoint. + */ +export class AmazonSageMakerElasticTaskSettings + implements AmazonSageMakerTaskSettings +{ + [key: string]: unknown +} + +/** + * `openai` API-specific task settings for Amazon SageMaker. + */ +export interface AmazonSageMakerOpenAiTaskSettings + extends AmazonSageMakerTaskSettings { + user?: string +} + +export enum AmazonSageMakerServiceType { + amazon_sagemaker +} + export class AnthropicServiceSettings { /** * A valid API key for the Anthropic API. diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 720dd9ea43..675615c852 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -23,6 +23,7 @@ import { TaskType, TaskTypeAlibabaCloudAI, TaskTypeAmazonBedrock, + TaskTypeAmazonSageMaker, TaskTypeAnthropic, TaskTypeAzureAIStudio, TaskTypeAzureOpenAI, @@ -109,6 +110,17 @@ export class InferenceEndpointInfoAmazonBedrock extends InferenceEndpoint { task_type: TaskTypeAmazonBedrock } +export class InferenceEndpointInfoAmazonSageMaker extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeAmazonSageMaker +} + export class InferenceEndpointInfoAnthropic extends InferenceEndpoint { /** * The inference Id diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 670dc3d3e5..512f6c5197 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -45,6 +45,14 @@ export enum TaskTypeAmazonBedrock { completion } +export enum TaskTypeAmazonSageMaker { + text_embedding, + completion, + chat_completion, + sparse_embedding, + rerank +} + export enum TaskTypeAnthropic { completion } diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts index 4554574e32..5350125edf 100644 --- a/specification/inference/put/PutRequest.ts +++ b/specification/inference/put/PutRequest.ts @@ -33,6 +33,7 @@ import { TaskType } from '@inference/_types/TaskType' * The following integrations are available through the inference API. You can find the available task types next to the integration name: * * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`) * * Amazon Bedrock (`completion`, `text_embedding`) + * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`) * * Anthropic (`completion`) * * Azure AI Studio (`completion`, `text_embedding`) * * Azure OpenAI (`completion`, `text_embedding`) diff --git a/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts new file mode 100644 index 0000000000..1b020e5fc8 --- /dev/null +++ b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts @@ -0,0 +1,86 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' +import { + AmazonSageMakerServiceSettings, + AmazonSageMakerServiceType, + AmazonSageMakerTaskSettings +} from '@inference/_types/CommonTypes' +import { InferenceChunkingSettings } from '@inference/_types/Services' +import { TaskTypeAmazonSageMaker } from '@inference/_types/TaskType' + +/** + * Create an Amazon SageMaker inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `amazon_sagemaker` service. + * @rest_spec_name inference.put_amazonsagemaker + * @availability stack since=9.1.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-amazonsagemaker + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{amazonsagemaker_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: TaskTypeAmazonSageMaker + /** + * The unique identifier of the inference endpoint. + */ + amazonsagemaker_inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference endpoint to be created. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `amazon_sagemaker`. + */ + service: AmazonSageMakerServiceType + /** + * Settings used to install the inference model. + * These settings are specific to the `amazon_sagemaker` service and `service_settings.api` you specified. + */ + service_settings: AmazonSageMakerServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type and `service_settings.api` you specified. + */ + task_settings?: AmazonSageMakerTaskSettings + } +} diff --git a/specification/inference/put_amazonsagemaker/PutAmazonSageMakerResponse.ts b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerResponse.ts new file mode 100644 index 0000000000..fe0aa373f5 --- /dev/null +++ b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerResponse.ts @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfoAmazonSageMaker } from '@inference/_types/Services' + +export class Response { + /** @codegen_name endpoint_info */ + body: InferenceEndpointInfoAmazonSageMaker +} diff --git a/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample1.yaml b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample1.yaml new file mode 100644 index 0000000000..006249bbd1 --- /dev/null +++ b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample1.yaml @@ -0,0 +1,17 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/amazon_sagemaker_embeddings` to create an inference endpoint that performs a text embedding task. +method_request: 'PUT _inference/text_embedding/amazon_sagemaker_embeddings' +# type: "request" +value: |- + { + "service": "amazon_sagemaker", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "api": "elastic", + "endpoint_name": "my-endpoint", + "dimensions": 384, + "element_type": "float" + } + } diff --git a/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample2.yaml b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample2.yaml new file mode 100644 index 0000000000..0d71127688 --- /dev/null +++ b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample2.yaml @@ -0,0 +1,15 @@ +summary: A completion task +description: Run `PUT _inference/completion/amazon_sagemaker_completion` to create an inference endpoint that performs a completion task. +method_request: 'PUT _inference/completion/amazon_sagemaker_completion' +# type: "request" +value: |- + { + "service": "amazon_sagemaker", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "api": "elastic", + "endpoint_name": "my-endpoint" + } + } diff --git a/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample3.yaml b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample3.yaml new file mode 100644 index 0000000000..ab12f559a3 --- /dev/null +++ b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample3.yaml @@ -0,0 +1,15 @@ +summary: A chat completion task +description: Run `PUT _inference/chat_completion/amazon_sagemaker_chat_completion` to create an inference endpoint that performs a chat completion task. +method_request: 'PUT _inference/chat_completion/amazon_sagemaker_chat_completion' +# type: "request" +value: |- + { + "service": "amazon_sagemaker", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "api": "elastic", + "endpoint_name": "my-endpoint" + } + } diff --git a/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample4.yaml b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample4.yaml new file mode 100644 index 0000000000..afac438c22 --- /dev/null +++ b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample4.yaml @@ -0,0 +1,15 @@ +summary: A sparse embedding task +description: Run `PUT _inference/sparse_embedding/amazon_sagemaker_sparse_embedding` to create an inference endpoint that performs a sparse embedding task. +method_request: 'PUT _inference/sparse_embedding/amazon_sagemaker_sparse_embedding' +# type: "request" +value: |- + { + "service": "amazon_sagemaker", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "api": "elastic", + "endpoint_name": "my-endpoint" + } + } diff --git a/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample5.yaml b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample5.yaml new file mode 100644 index 0000000000..861579eaaa --- /dev/null +++ b/specification/inference/put_amazonsagemaker/request/PutAmazonSageMakerRequestExample5.yaml @@ -0,0 +1,15 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/amazon_sagemaker_rerank` to create an inference endpoint that performs a rerank task. +method_request: 'PUT _inference/rerank/amazon_sagemaker_rerank' +# type: "request" +value: |- + { + "service": "amazon_sagemaker", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "api": "elastic", + "endpoint_name": "my-endpoint" + } + }