Skip to content

Commit cf8e494

Browse files
[Inference] Add imageToVideo inference task (#1578)
Add general support for the `image-to-video` task in `@huggingface/inference` cc @zeke (related to #1575 ) --------- Co-authored-by: apolinário <[email protected]>
1 parent 7bdbb0e commit cf8e494

File tree

4 files changed

+38
-1
lines changed

4 files changed

+38
-1
lines changed

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import type {
2424
ImageSegmentationTaskHelper,
2525
ImageToImageTaskHelper,
2626
ImageToTextTaskHelper,
27+
ImageToVideoTaskHelper,
2728
ObjectDetectionTaskHelper,
2829
QuestionAnsweringTaskHelper,
2930
SentenceSimilarityTaskHelper,
@@ -239,6 +240,10 @@ export function getProviderHelper(
239240
provider: InferenceProviderOrPolicy,
240241
task: "image-to-image"
241242
): ImageToImageTaskHelper & TaskProviderHelper;
243+
export function getProviderHelper(
244+
provider: InferenceProviderOrPolicy,
245+
task: "image-to-video"
246+
): ImageToVideoTaskHelper & TaskProviderHelper;
242247
export function getProviderHelper(
243248
provider: InferenceProviderOrPolicy,
244249
task: "sentence-similarity"
@@ -275,7 +280,6 @@ export function getProviderHelper(
275280
provider: InferenceProviderOrPolicy,
276281
task: InferenceTask | undefined
277282
): TaskProviderHelper;
278-
279283
export function getProviderHelper(
280284
provider: InferenceProviderOrPolicy,
281285
task: InferenceTask | undefined

packages/inference/src/providers/providerHelper.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import type {
1818
ImageToImageInput,
1919
ImageToTextInput,
2020
ImageToTextOutput,
21+
ImageToVideoInput,
2122
ObjectDetectionInput,
2223
ObjectDetectionOutput,
2324
QuestionAnsweringInput,
@@ -52,6 +53,7 @@ import type { BaseArgs, BodyParams, HeaderParams, InferenceProvider, RequestArgs
5253
import { toArray } from "../utils/toArray.js";
5354
import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js";
5455
import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js";
56+
import type { ImageToVideoArgs } from "../tasks/cv/imageToVideo.js";
5557

5658
/**
5759
* Base class for task-specific provider helpers
@@ -150,6 +152,12 @@ export interface ImageToImageTaskHelper {
150152
preparePayloadAsync(args: ImageToImageArgs): Promise<RequestArgs>;
151153
}
152154

155+
export interface ImageToVideoTaskHelper {
156+
getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<Blob>;
157+
preparePayload(params: BodyParams<ImageToVideoInput & BaseArgs>): Record<string, unknown>;
158+
preparePayloadAsync(args: ImageToVideoArgs): Promise<RequestArgs>;
159+
}
160+
153161
export interface ImageSegmentationTaskHelper {
154162
getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<ImageSegmentationOutput>;
155163
preparePayload(params: BodyParams<ImageSegmentationInput & BaseArgs>): Record<string, unknown> | BodyInit;
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import type { ImageToVideoInput } from "@huggingface/tasks";
2+
import { resolveProvider } from "../../lib/getInferenceProviderMapping.js";
3+
import { getProviderHelper } from "../../lib/getProviderHelper.js";
4+
import type { BaseArgs, Options } from "../../types.js";
5+
import { innerRequest } from "../../utils/request.js";
6+
import { makeRequestOptions } from "../../lib/makeRequestOptions.js";
7+
8+
export type ImageToVideoArgs = BaseArgs & ImageToVideoInput;
9+
10+
/**
11+
* This task reads some text input and outputs an image.
12+
* Recommended model: Wan-AI/Wan2.1-I2V-14B-720P
13+
*/
14+
export async function imageToVideo(args: ImageToVideoArgs, options?: Options): Promise<Blob> {
15+
const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
16+
const providerHelper = getProviderHelper(provider, "image-to-video");
17+
const payload = await providerHelper.preparePayloadAsync(args);
18+
const { data: res } = await innerRequest<Blob>(payload, providerHelper, {
19+
...options,
20+
task: "image-to-video",
21+
});
22+
const { url, info } = await makeRequestOptions(args, providerHelper, { ...options, task: "image-to-video" });
23+
return providerHelper.getResponse(res, url, info.headers as Record<string, string>);
24+
}

packages/inference/src/tasks/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export * from "./cv/imageClassification.js";
1313
export * from "./cv/imageSegmentation.js";
1414
export * from "./cv/imageToImage.js";
1515
export * from "./cv/imageToText.js";
16+
export * from "./cv/imageToVideo.js";
1617
export * from "./cv/objectDetection.js";
1718
export * from "./cv/textToImage.js";
1819
export * from "./cv/textToVideo.js";

0 commit comments

Comments
 (0)