From f15ab4d85cd47370747a5903dcb96abebdc023ad Mon Sep 17 00:00:00 2001 From: SBrandeis Date: Tue, 1 Jul 2025 12:22:47 +0200 Subject: [PATCH 1/2] Add imageToVideo inference task --- .../inference/src/lib/getProviderHelper.ts | 6 ++++- .../inference/src/providers/providerHelper.ts | 8 +++++++ .../inference/src/tasks/cv/imageToVideo.ts | 24 +++++++++++++++++++ packages/inference/src/tasks/index.ts | 1 + 4 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 packages/inference/src/tasks/cv/imageToVideo.ts diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts index d7cc87fb59..06e692aa72 100644 --- a/packages/inference/src/lib/getProviderHelper.ts +++ b/packages/inference/src/lib/getProviderHelper.ts @@ -24,6 +24,7 @@ import type { ImageSegmentationTaskHelper, ImageToImageTaskHelper, ImageToTextTaskHelper, + ImageToVideoTaskHelper, ObjectDetectionTaskHelper, QuestionAnsweringTaskHelper, SentenceSimilarityTaskHelper, @@ -239,6 +240,10 @@ export function getProviderHelper( provider: InferenceProviderOrPolicy, task: "image-to-image" ): ImageToImageTaskHelper & TaskProviderHelper; +export function getProviderHelper( + provider: InferenceProviderOrPolicy, + task: "image-to-video" +): ImageToVideoTaskHelper & TaskProviderHelper; export function getProviderHelper( provider: InferenceProviderOrPolicy, task: "sentence-similarity" @@ -275,7 +280,6 @@ export function getProviderHelper( provider: InferenceProviderOrPolicy, task: InferenceTask | undefined ): TaskProviderHelper; - export function getProviderHelper( provider: InferenceProviderOrPolicy, task: InferenceTask | undefined diff --git a/packages/inference/src/providers/providerHelper.ts b/packages/inference/src/providers/providerHelper.ts index f934ef8fb1..5260b23998 100644 --- a/packages/inference/src/providers/providerHelper.ts +++ b/packages/inference/src/providers/providerHelper.ts @@ -18,6 +18,7 @@ import type { ImageToImageInput, ImageToTextInput, ImageToTextOutput, + ImageToVideoInput, ObjectDetectionInput, ObjectDetectionOutput, QuestionAnsweringInput, @@ -52,6 +53,7 @@ import type { BaseArgs, BodyParams, HeaderParams, InferenceProvider, RequestArgs import { toArray } from "../utils/toArray.js"; import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js"; import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js"; +import type { ImageToVideoArgs } from "../tasks/cv/imageToVideo.js"; /** * Base class for task-specific provider helpers @@ -150,6 +152,12 @@ export interface ImageToImageTaskHelper { preparePayloadAsync(args: ImageToImageArgs): Promise; } +export interface ImageToVideoTaskHelper { + getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise; + preparePayload(params: BodyParams): Record; + preparePayloadAsync(args: ImageToVideoArgs): Promise; +} + export interface ImageSegmentationTaskHelper { getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise; preparePayload(params: BodyParams): Record | BodyInit; diff --git a/packages/inference/src/tasks/cv/imageToVideo.ts b/packages/inference/src/tasks/cv/imageToVideo.ts new file mode 100644 index 0000000000..9dba1c05ab --- /dev/null +++ b/packages/inference/src/tasks/cv/imageToVideo.ts @@ -0,0 +1,24 @@ +import type { ImageToVideoInput } from "@huggingface/tasks"; +import { resolveProvider } from "../../lib/getInferenceProviderMapping.js"; +import { getProviderHelper } from "../../lib/getProviderHelper.js"; +import type { BaseArgs, Options } from "../../types.js"; +import { innerRequest } from "../../utils/request.js"; +import { makeRequestOptions } from "../../lib/makeRequestOptions.js"; + +export type ImageToVideoArgs = BaseArgs & ImageToVideoInput; + +/** + * This task reads some text input and outputs an image. + * Recommended model: lllyasviel/sd-controlnet-depth + */ +export async function imageToVideo(args: ImageToVideoArgs, options?: Options): Promise { + const provider = await resolveProvider(args.provider, args.model, args.endpointUrl); + const providerHelper = getProviderHelper(provider, "image-to-video"); + const payload = await providerHelper.preparePayloadAsync(args); + const { data: res } = await innerRequest(payload, providerHelper, { + ...options, + task: "image-to-video", + }); + const { url, info } = await makeRequestOptions(args, providerHelper, { ...options, task: "image-to-video" }); + return providerHelper.getResponse(res, url, info.headers as Record); +} diff --git a/packages/inference/src/tasks/index.ts b/packages/inference/src/tasks/index.ts index f32d87f0f6..a39779a227 100644 --- a/packages/inference/src/tasks/index.ts +++ b/packages/inference/src/tasks/index.ts @@ -13,6 +13,7 @@ export * from "./cv/imageClassification.js"; export * from "./cv/imageSegmentation.js"; export * from "./cv/imageToImage.js"; export * from "./cv/imageToText.js"; +export * from "./cv/imageToVideo.js"; export * from "./cv/objectDetection.js"; export * from "./cv/textToImage.js"; export * from "./cv/textToVideo.js"; From 49f27a7121b7901d1974c500b78454a3613b0d7d Mon Sep 17 00:00:00 2001 From: Simon Brandeis <33657802+SBrandeis@users.noreply.github.com> Date: Tue, 1 Jul 2025 18:10:32 +0200 Subject: [PATCH 2/2] Update packages/inference/src/tasks/cv/imageToVideo.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: apolinário --- packages/inference/src/tasks/cv/imageToVideo.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/inference/src/tasks/cv/imageToVideo.ts b/packages/inference/src/tasks/cv/imageToVideo.ts index 9dba1c05ab..cfe3dd7b4b 100644 --- a/packages/inference/src/tasks/cv/imageToVideo.ts +++ b/packages/inference/src/tasks/cv/imageToVideo.ts @@ -9,7 +9,7 @@ export type ImageToVideoArgs = BaseArgs & ImageToVideoInput; /** * This task reads some text input and outputs an image. - * Recommended model: lllyasviel/sd-controlnet-depth + * Recommended model: Wan-AI/Wan2.1-I2V-14B-720P */ export async function imageToVideo(args: ImageToVideoArgs, options?: Options): Promise { const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);