diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index ddd47afb65..f67616115a 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -358,6 +358,7 @@ inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/op inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html, inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html, inference-api-put-cohere,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html, +inference-api-put-custom,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom,https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-custom.html, inference-api-put-deepseek,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-deepseek.html, inference-api-put-eis,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis,, inference-api-put-elasticsearch,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html, diff --git a/specification/_json_spec/inference.put_custom.json b/specification/_json_spec/inference.put_custom.json new file mode 100644 index 0000000000..c12108683d --- /dev/null +++ b/specification/_json_spec/inference.put_custom.json @@ -0,0 +1,35 @@ +{ + "inference.put_custom": { + "documentation": { + "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom", + "description": "Configure a custom inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{custom_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "custom_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index e097e0cd6a..c4799c1705 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -758,6 +758,136 @@ export class CohereTaskSettings { truncate?: CohereTruncateType } +export class CustomServiceSettings { + /** + * Specifies the HTTPS header parameters – such as `Authentication` or `Contet-Type` – that are required to access the custom service. + * For example: + * ``` + * "headers":{ + * "Authorization": "Bearer ${api_key}", + * "Content-Type": "application/json;charset=utf-8" + * } + * ``` + */ + headers?: UserDefinedValue + /** + * The request configuration object. + */ + request: CustomRequestParams + /** + * The response configuration object. + */ + response: CustomResponseParams + /** + * Specifies secret parameters, like `api_key` or `api_token`, that are required to access the custom service. + * For example: + * ``` + * "secret_parameters":{ + * "api_key":"" + * } + * ``` + */ + secret_parameters: UserDefinedValue + /** + * The URL endpoint to use for the requests. + */ + url?: string +} + +export class CustomRequestParams { + /** + * The body structure of the request. It requires passing in the string-escaped result of the JSON format HTTP request body. + * For example: + * ``` + * "request":{ + * "content":"{\"input\":${input}}" + * } + * ``` + * > info + * > The content string needs to be a single line except using the Kibana console. + */ + content: string +} + +export class CustomResponseParams { + /** + * Specifies the path to the error message in the response from the custom service. + * For example: + * ``` + * "response": { + * "error_parser": { + * "path": "$.error.message" + * } + * } + * ``` + */ + error_parser: UserDefinedValue + /** + * Specifies the JSON parser that is used to parse the response from the custom service. + * Different task types require different json_parser parameters. + * For example: + * ``` + * # text_embedding + * "response":{ + * "json_parser":{ + * "text_embeddings":"$.result.embeddings[*].embedding" + * } + * } + * + * # sparse_embedding + * "response":{ + * "json_parser":{ + * "token_path":"$.result[*].embeddings[*].token", + * "weight_path":"$.result[*].embeddings[*].weight" + * } + * } + * + * # rerank + * "response":{ + * "json_parser":{ + * "reranked_index":"$.result.scores[*].index", // optional + * "relevance_score":"$.result.scores[*].score", + * "document_text":"xxx" // optional + * } + * } + * + * # completion + * "response":{ + * "json_parser":{ + * "completion_result":"$.result.text" + * } + * } + */ + json_parser: UserDefinedValue +} + +export enum CustomTaskType { + text_embedding, + sparse_embedding, + rerank, + completion +} + +export enum CustomServiceType { + custom +} + +export class CustomTaskSettings { + /** + * Specifies parameters that are required to run the custom service. The parameters depend on the model your custom service uses. + * For example: + * ``` + * "task_settings":{ + * "parameters":{ + * "input_type":"query", + * "return_token":true + * } + * } + * ``` + */ + parameters?: UserDefinedValue +} + export class EisServiceSettings { /** * The name of the model to use for the inference task. diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 720dd9ea43..802db7decc 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -27,6 +27,7 @@ import { TaskTypeAzureAIStudio, TaskTypeAzureOpenAI, TaskTypeCohere, + TaskTypeCustom, TaskTypeDeepSeek, TaskTypeElasticsearch, TaskTypeELSER, @@ -75,18 +76,6 @@ export class InferenceEndpointInfo extends InferenceEndpoint { */ task_type: TaskType } - -export class InferenceEndpointInfoJinaAi extends InferenceEndpoint { - /** - * The inference Id - */ - inference_id: string - /** - * The task type - */ - task_type: TaskTypeJinaAi -} - export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint { /** * The inference Id @@ -153,6 +142,16 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint { task_type: TaskTypeCohere } +export class InferenceEndpointInfoCustom extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeCustom +} export class InferenceEndpointInfoDeepSeek extends InferenceEndpoint { /** * The inference Id @@ -219,6 +218,17 @@ export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint { task_type: TaskTypeHuggingFace } +export class InferenceEndpointInfoJinaAi extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeJinaAi +} + export class InferenceEndpointInfoMistral extends InferenceEndpoint { /** * The inference Id diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 670dc3d3e5..6daed0d281 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -65,6 +65,13 @@ export enum TaskTypeCohere { completion } +export enum TaskTypeCustom { + text_embedding, + sparse_embedding, + rerank, + completion +} + export enum TaskTypeDeepSeek { completion, chat_completion diff --git a/specification/inference/put_custom/PutCustomRequest.ts b/specification/inference/put_custom/PutCustomRequest.ts new file mode 100644 index 0000000000..fab798391a --- /dev/null +++ b/specification/inference/put_custom/PutCustomRequest.ts @@ -0,0 +1,78 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { + CustomServiceSettings, + CustomServiceType, + CustomTaskSettings, + CustomTaskType +} from '@inference/_types/CommonTypes' +import { InferenceChunkingSettings } from '@inference/_types/Services' + +/** + * Create a custom inference endpoint. + * + * You can create an inference endpoint to perform an inference task with a custom model that supports the HTTP format. + * @rest_spec_name inference.put_custom + * @availability stack since=8.13.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-custom + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{custom_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: CustomTaskType + /** + * The unique identifier of the inference endpoint. + */ + custom_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `custom`. + */ + service: CustomServiceType + /** + * Settings used to install the inference model. + * These settings are specific to the `custom` service. + */ + service_settings: CustomServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: CustomTaskSettings + } +} diff --git a/specification/inference/put_custom/PutCustomResponse.ts b/specification/inference/put_custom/PutCustomResponse.ts new file mode 100644 index 0000000000..c09467c944 --- /dev/null +++ b/specification/inference/put_custom/PutCustomResponse.ts @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfoCustom } from '@inference/_types/Services' + +export class Response { + /** @codegen_name endpoint_info */ + body: InferenceEndpointInfoCustom +} diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample1.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample1.yaml new file mode 100644 index 0000000000..dabe71ed86 --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample1.yaml @@ -0,0 +1,24 @@ +summary: Custom text embedding task (OpenAI) +description: Run `PUT _inference/text_embedding/custom-embeddings` to create an inference endpoint that performs a text embedding task. +method_request: 'PUT _inference/text_embedding/custom-embeddings' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.openai.com/v1/embeddings", + "headers": { + "Authorization": "Bearer ${api_key}", + "Content-Type": "application/json;charset=utf-8" + }, + "request": "{\"input\": ${input}, \"model\": \"text-embedding-3-small\"}", + "response": { + "json_parser": { + "text_embeddings": "$.data[*].embedding[*]" + } + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample2.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample2.yaml new file mode 100644 index 0000000000..1fb61b58c1 --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample2.yaml @@ -0,0 +1,25 @@ +summary: Custom rerank task (Cohere APIv2) +description: Run `PUT _inference/rerank/custom-rerank` to create an inference endpoint that performs a rerank task. +method_request: 'PUT _inference/rerank/custom-rerank' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.cohere.com/v2/rerank", + "headers": { + "Authorization": "bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"documents\": ${input}, \"query\": ${query}, \"model\": \"rerank-v3.5\"}", + "response": { + "json_parser": { + "reranked_index":"$.results[*].index", + "relevance_score":"$.results[*].relevance_score" + } + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample3.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample3.yaml new file mode 100644 index 0000000000..70e5609051 --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample3.yaml @@ -0,0 +1,31 @@ +summary: Custom text embedding task (Cohere APIv2) +description: Run `PUT _inference/text_embedding/custom-text-embedding` to create an inference endpoint that performs a text embedding task. +method_request: 'PUT _inference/text_embedding/custom-text-embedding' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.cohere.com/v2/embed", + "headers": { + "Authorization": "bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"texts\": ${input}, \"model\": \"embed-v4.0\", \"input_type\": ${input_type}}", + "response": { + "json_parser": { + "text_embeddings":"$.embeddings.float[*]" + } + }, + "input_type": { + "translation": { + "ingest": "search_document", + "search": "search_query" + }, + "default": "search_document" + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample4.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample4.yaml new file mode 100644 index 0000000000..4ecaaf020b --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample4.yaml @@ -0,0 +1,25 @@ +summary: Custom rerank task (Jina AI) +description: Run `PUT _inference/rerank/custom-rerank-jina` to create an inference endpoint that performs a rerank task. +method_request: 'PUT _inference/rerank/custom-rerank-jina' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "https://api.jina.ai/v1/rerank", + "headers": { + "Content-Type": "application/json", + "Authorization": "Bearer ${api_key}" + }, + "request": "{\"model\": \"jina-reranker-v2-base-multilingual\",\"query\": ${query},\"documents\":${input}}", + "response": { + "json_parser": { + "relevance_score": "$.results[*].relevance_score", + "reranked_index": "$.results[*].index" + } + } + } + } diff --git a/specification/inference/put_custom/examples/PutCustomRequestExample5.yaml b/specification/inference/put_custom/examples/PutCustomRequestExample5.yaml new file mode 100644 index 0000000000..c9f86dad8d --- /dev/null +++ b/specification/inference/put_custom/examples/PutCustomRequestExample5.yaml @@ -0,0 +1,24 @@ +summary: Custom text embedding task (Hugging Face) +description: Run `PUT _inference/text_embedding/custom-text-embedding-hf` to create an inference endpoint that performs a text embedding task by using the Qwen/Qwen3-Embedding-8B model. +method_request: 'PUT _inference/text_embedding/custom-text-embedding-hf' +# type: "request" +value: |- + { + "service": "custom", + "service_settings": { + "secret_parameters": { + "api_key": "" + }, + "url": "/v1/embeddings", + "headers": { + "Authorization": "Bearer ${api_key}", + "Content-Type": "application/json" + }, + "request": "{\"input\": ${input}}", + "response": { + "json_parser": { + "text_embeddings":"$.data[*].embedding[*]" + } + } + } + } diff --git a/specification/inference/put_deepseek/request/PutDeepSeekRequestExample1.yaml b/specification/inference/put_deepseek/examples/PutDeepSeekRequestExample1.yaml similarity index 100% rename from specification/inference/put_deepseek/request/PutDeepSeekRequestExample1.yaml rename to specification/inference/put_deepseek/examples/PutDeepSeekRequestExample1.yaml diff --git a/specification/inference/put_deepseek/request/PutDeepSeekRequestExample2.yaml b/specification/inference/put_deepseek/examples/PutDeepSeekRequestExample2.yaml similarity index 100% rename from specification/inference/put_deepseek/request/PutDeepSeekRequestExample2.yaml rename to specification/inference/put_deepseek/examples/PutDeepSeekRequestExample2.yaml