From d827ef75e4c9d0685920c1e491ed0b3a0491efa1 Mon Sep 17 00:00:00 2001 From: SecurityQQ Date: Thu, 23 Apr 2026 20:44:28 -0700 Subject: [PATCH] feat: add video-to-video models (Kling O3 reference, 4K, v3 motion control) Add 6 new Kling video models with video input support: - kling-v3-4k: native 4K image-to-video ($0.42/sec) - kling-v3-ref: O3 Pro reference-to-video with character consistency ($0.112-0.14/sec) - kling-v3-4k-ref: O3 4K reference-to-video ($0.42/sec) - kling-v3-v2v-ref: O3 Standard video-to-video reference preserving motion/camera ($0.126/sec) - kling-v3-motion: V3 Pro motion control - transfer motion to character (up to 30s, $0.168/sec) - kling-v3-motion-standard: V3 Standard motion control ($0.126/sec) Also: - GenerateVideoPrompt now accepts singular or array for images/audio/video fields - Video action routes video+image to motion control, video-only to v2v reference - Added v2v/vid2vid resolver aliases - Added motionControl() and videoToVideoReference() convenience methods to fal provider --- src/ai-sdk/generate-video.ts | 22 +- src/ai-sdk/providers/fal.ts | 126 ++++++++++- src/ai-sdk/providers/model-rules.ts | 10 + src/core/registry/resolver.ts | 4 + src/definitions/actions/video.ts | 28 ++- src/definitions/models/index.ts | 26 ++- src/definitions/models/kling.ts | 310 ++++++++++++++++++++++++++++ src/providers/fal.ts | 97 +++++++++ src/react/renderers/progress.ts | 6 + 9 files changed, 613 insertions(+), 16 deletions(-) diff --git a/src/ai-sdk/generate-video.ts b/src/ai-sdk/generate-video.ts index f26efdb4..0d07bb79 100644 --- a/src/ai-sdk/generate-video.ts +++ b/src/ai-sdk/generate-video.ts @@ -10,9 +10,9 @@ export type GenerateVideoPrompt = | string | { text?: string; - images?: Array; - audio?: DataContent; - video?: DataContent; + images?: DataContent | Array; + audio?: DataContent | Array; + video?: DataContent | Array; }; export interface GenerateVideoOptions { @@ -76,6 +76,12 @@ function toUint8Array(data: DataContent): Uint8Array { return data; } +/** Normalize singular or array to array */ +function toArray(value: T | T[] | undefined): T[] { + if (value == null) return []; + return Array.isArray(value) ? value : [value]; +} + function normalizePrompt(prompt: GenerateVideoPrompt): { prompt: string | undefined; files: ImageModelV3File[] | undefined; @@ -86,7 +92,7 @@ function normalizePrompt(prompt: GenerateVideoPrompt): { const files: ImageModelV3File[] = []; - for (const img of prompt.images ?? []) { + for (const img of toArray(prompt.images)) { files.push({ type: "file", mediaType: "image/png", @@ -94,19 +100,19 @@ function normalizePrompt(prompt: GenerateVideoPrompt): { }); } - if (prompt.audio) { + for (const aud of toArray(prompt.audio)) { files.push({ type: "file", mediaType: "audio/mpeg", - data: toUint8Array(prompt.audio), + data: toUint8Array(aud), }); } - if (prompt.video) { + for (const vid of toArray(prompt.video)) { files.push({ type: "file", mediaType: "video/mp4", - data: toUint8Array(prompt.video), + data: toUint8Array(vid), }); } diff --git a/src/ai-sdk/providers/fal.ts b/src/ai-sdk/providers/fal.ts index d58d6811..2206edef 100644 --- a/src/ai-sdk/providers/fal.ts +++ b/src/ai-sdk/providers/fal.ts @@ -107,6 +107,11 @@ const VIDEO_MODELS: Record = { t2v: "fal-ai/kling-video/o3/standard/text-to-video", i2v: "fal-ai/kling-video/o3/standard/image-to-video", }, + // Kling O3 4K - native 4K output (i2v only, t2v falls back to pro) + "kling-v3-4k": { + t2v: "fal-ai/kling-video/o3/pro/text-to-video", + i2v: "fal-ai/kling-video/o3/4k/image-to-video", + }, // Kling v2.6 - with native audio generation "kling-v2.6": { t2v: "fal-ai/kling-video/v2.6/pro/text-to-video", @@ -163,8 +168,21 @@ const VIDEO_EDIT_MODELS: Record = { "sora-2-remix": "fal-ai/sora-2/video-to-video/remix", }; +// Reference-to-video models - images/elements + prompt → video with character consistency +const REFERENCE_VIDEO_MODELS: Record = { + "kling-v3-ref": "fal-ai/kling-video/o3/pro/reference-to-video", + "kling-v3-4k-ref": "fal-ai/kling-video/o3/4k/reference-to-video", +}; + +// Video-to-video reference models - reference video + prompt → new video preserving motion/camera +const V2V_REFERENCE_MODELS: Record = { + "kling-v3-v2v-ref": "fal-ai/kling-video/o3/standard/video-to-video/reference", +}; + // Motion control models - video-to-video with motion transfer const MOTION_CONTROL_MODELS: Record = { + "kling-v3-motion": "fal-ai/kling-video/v3/pro/motion-control", + "kling-v3-motion-standard": "fal-ai/kling-video/v3/standard/motion-control", "kling-v2.6-motion": "fal-ai/kling-video/v2.6/pro/motion-control", "kling-v2.6-motion-standard": "fal-ai/kling-video/v2.6/standard/motion-control", @@ -520,8 +538,12 @@ class FalVideoModel implements VideoModelV3 { const isMotionControl = MOTION_CONTROL_MODELS[this.modelId] !== undefined; const isVideoEdit = VIDEO_EDIT_MODELS[this.modelId] !== undefined; const isVideoUpscale = VIDEO_UPSCALE_MODELS[this.modelId] !== undefined; + const isReferenceVideo = REFERENCE_VIDEO_MODELS[this.modelId] !== undefined; + const isV2VReference = V2V_REFERENCE_MODELS[this.modelId] !== undefined; const isKlingV3 = - this.modelId === "kling-v3" || this.modelId === "kling-v3-standard"; + this.modelId === "kling-v3" || + this.modelId === "kling-v3-standard" || + this.modelId === "kling-v3-4k"; const isKlingV26 = this.modelId === "kling-v2.6"; const isLtx2 = this.modelId === "ltx-2-19b-distilled"; const isGrokImagine = this.modelId === "grok-imagine"; @@ -537,7 +559,11 @@ class FalVideoModel implements VideoModelV3 { ? this.resolveVideoEditEndpoint() : isVideoUpscale ? this.resolveVideoUpscaleEndpoint() - : this.resolveEndpoint(hasImageInput ?? false); + : isReferenceVideo + ? this.resolveReferenceVideoEndpoint() + : isV2VReference + ? this.resolveV2VReferenceEndpoint() + : this.resolveEndpoint(hasImageInput ?? false); const input: Record = { ...(providerOptions?.fal ?? {}), @@ -625,6 +651,86 @@ class FalVideoModel implements VideoModelV3 { if (videoFile) { input.video_url = await fileToUrl(videoFile); } + } else if (isReferenceVideo) { + // Reference-to-video: prompt + optional start/end images + reference images + // Elements and multi_prompt are passed via providerOptions.fal + if (prompt) { + input.prompt = prompt; + } + + if (files) { + const imageFiles = files.filter((f) => + getMediaType(f)?.startsWith("image/"), + ); + // First image → start_image_url, second → end_image_url + if (imageFiles[0]) { + input.start_image_url = await fileToUrl(imageFiles[0]); + } + if (imageFiles[1]) { + input.end_image_url = await fileToUrl(imageFiles[1]); + } + // Additional images (3+) → image_urls for style/appearance reference + if (imageFiles.length > 2) { + const additionalUrls: string[] = []; + for (let i = 2; i < imageFiles.length; i++) { + additionalUrls.push(await fileToUrl(imageFiles[i]!)); + } + input.image_urls = additionalUrls; + } + } + + // Duration as string integer for Kling O3 + const normalized = normalizeProviderInput(this.modelId, { duration }); + input.duration = normalized.duration; + + if (!input.aspect_ratio) { + input.aspect_ratio = aspectRatio ?? "16:9"; + } + + // Default to generating audio + if (input.generate_audio === undefined) { + input.generate_audio = true; + } + } else if (isV2VReference) { + // Video-to-video reference: reference video + prompt → new video preserving motion/camera + // Elements and image_urls are passed via providerOptions.fal + if (prompt) { + input.prompt = prompt; + } + + const videoFile = files?.find((f) => + getMediaType(f)?.startsWith("video/"), + ); + if (videoFile) { + input.video_url = await fileToUrl(videoFile); + } + + // Reference images from file inputs (for style/appearance) + if (files) { + const imageFiles = files.filter((f) => + getMediaType(f)?.startsWith("image/"), + ); + if (imageFiles.length > 0) { + const imageUrls: string[] = []; + for (const imgFile of imageFiles) { + imageUrls.push(await fileToUrl(imgFile)); + } + input.image_urls = imageUrls; + } + } + + // Duration as string integer for Kling O3 + const normalized = normalizeProviderInput(this.modelId, { duration }); + input.duration = normalized.duration; + + if (!input.aspect_ratio) { + input.aspect_ratio = aspectRatio ?? "auto"; + } + + // Default to keeping original audio from reference video + if (input.keep_audio === undefined) { + input.keep_audio = true; + } } else { // Standard video generation input.prompt = prompt; @@ -825,6 +931,22 @@ class FalVideoModel implements VideoModelV3 { return VIDEO_UPSCALE_MODELS[this.modelId] ?? this.modelId; } + + private resolveReferenceVideoEndpoint(): string { + if (this.modelId.startsWith("raw:")) { + return this.modelId.slice(4); + } + + return REFERENCE_VIDEO_MODELS[this.modelId] ?? this.modelId; + } + + private resolveV2VReferenceEndpoint(): string { + if (this.modelId.startsWith("raw:")) { + return this.modelId.slice(4); + } + + return V2V_REFERENCE_MODELS[this.modelId] ?? this.modelId; + } } class FalImageModel implements ImageModelV3 { diff --git a/src/ai-sdk/providers/model-rules.ts b/src/ai-sdk/providers/model-rules.ts index 21e3f5ff..7f049cc5 100644 --- a/src/ai-sdk/providers/model-rules.ts +++ b/src/ai-sdk/providers/model-rules.ts @@ -70,6 +70,16 @@ const ModelDurationRules: Record = { "kling-v3": z.object({ duration: stringIntDuration(3, 15, 5) }), "kling-v3-standard": z.object({ duration: stringIntDuration(3, 15, 5) }), + // Kling O3 4K: same rules as v3 + "kling-v3-4k": z.object({ duration: stringIntDuration(3, 15, 5) }), + + // Kling O3 reference-to-video: same duration range + "kling-v3-ref": z.object({ duration: stringIntDuration(3, 15, 5) }), + "kling-v3-4k-ref": z.object({ duration: stringIntDuration(3, 15, 5) }), + + // Kling O3 video-to-video reference: same duration range + "kling-v3-v2v-ref": z.object({ duration: stringIntDuration(3, 15, 5) }), + // Kling v2.6: same rules as v3 "kling-v2.6": z.object({ duration: stringIntDuration(3, 15, 5) }), diff --git a/src/core/registry/resolver.ts b/src/core/registry/resolver.ts index d57db3fa..cc94ea07 100644 --- a/src/core/registry/resolver.ts +++ b/src/core/registry/resolver.ts @@ -40,6 +40,10 @@ const ALIASES: Record = { stt: "speech-to-text", voice: "text-to-speech", + // Video-to-video + v2v: "video-to-video", + vid2vid: "video-to-video", + // Video editing concat: "merge", join: "merge", diff --git a/src/definitions/actions/video.ts b/src/definitions/actions/video.ts index d34e0db0..0b73bbb0 100644 --- a/src/definitions/actions/video.ts +++ b/src/definitions/actions/video.ts @@ -19,6 +19,9 @@ const videoInputSchema = z.object({ image: filePathSchema .optional() .describe("Input image (enables image-to-video)"), + video: filePathSchema + .optional() + .describe("Input video for video-to-video (preserves motion/camera style)"), duration: videoDurationSchema .default(5) .describe("Video duration in seconds"), @@ -42,7 +45,7 @@ const schema: ZodSchema = { export const definition: ActionDefinition = { type: "action", name: "video", - description: "Generate video from text or image", + description: "Generate video from text, image, or video", schema, routes: [ { @@ -51,12 +54,29 @@ export const definition: ActionDefinition = { }, ], execute: async (inputs) => { - // inputs is now fully typed as VideoInput - no more `as` cast! - const { prompt, image, duration, aspectRatio } = inputs; + const { prompt, image, video, duration, aspectRatio } = inputs; let result: { data?: { video?: { url?: string }; duration?: number } }; - if (image) { + if (video && image) { + // Video + image → motion control (transfer motion to character) + console.log( + "[action/video] generating motion control video (image + video)", + ); + result = await falProvider.motionControl({ + prompt, + imageUrl: image, + videoUrl: video, + }); + } else if (video) { + // Video only → video-to-video reference (preserve motion/camera) + console.log("[action/video] generating video-to-video reference"); + result = await falProvider.videoToVideoReference({ + prompt, + videoUrl: video, + duration, + }); + } else if (image) { console.log("[action/video] generating video from image"); result = await falProvider.imageToVideo({ prompt, diff --git a/src/definitions/models/index.ts b/src/definitions/models/index.ts index 182fd6ad..dd9c870a 100644 --- a/src/definitions/models/index.ts +++ b/src/definitions/models/index.ts @@ -11,7 +11,15 @@ import type { export { definition as elevenlabsTts } from "./elevenlabs"; export { definition as flux } from "./flux"; export { definition as heygenAvatar } from "./heygen"; -export { definition as kling } from "./kling"; +export { + definition as kling, + kling4kDefinition as kling4k, + kling4kRefDefinition as kling4kRef, + klingRefDefinition as klingRef, + klingV2VRefDefinition as klingV2VRef, + klingV3MotionDefinition as klingV3Motion, + klingV3MotionStdDefinition as klingV3MotionStd, +} from "./kling"; export { definition as llama } from "./llama"; export { definition as ltxA2v } from "./ltx-a2v"; export { definition as nanoBanana2 } from "./nano-banana-2"; @@ -39,7 +47,15 @@ export { definition as whisper } from "./whisper"; import { definition as elevenlabsDefinition } from "./elevenlabs"; import { definition as fluxDefinition } from "./flux"; import { definition as heygenAvatarDefinition } from "./heygen"; -import { definition as klingDefinition } from "./kling"; +import { + kling4kDefinition, + kling4kRefDefinition, + definition as klingDefinition, + klingRefDefinition, + klingV2VRefDefinition, + klingV3MotionDefinition, + klingV3MotionStdDefinition, +} from "./kling"; import { definition as llamaDefinition } from "./llama"; import { definition as ltxA2vDefinition } from "./ltx-a2v"; import { definition as nanoBanana2Definition } from "./nano-banana-2"; @@ -65,6 +81,12 @@ import { definition as whisperDefinition } from "./whisper"; export const allModels = [ klingDefinition, + kling4kDefinition, + klingRefDefinition, + kling4kRefDefinition, + klingV2VRefDefinition, + klingV3MotionDefinition, + klingV3MotionStdDefinition, fluxDefinition, nanoBananaProDefinition, nanoBanana2Definition, diff --git a/src/definitions/models/kling.ts b/src/definitions/models/kling.ts index c8154821..d2facd47 100644 --- a/src/definitions/models/kling.ts +++ b/src/definitions/models/kling.ts @@ -68,4 +68,314 @@ export const definition: ModelDefinition = { }, }; +// --------------------------------------------------------------------------- +// Kling O3 4K — native 4K output (image-to-video) +// --------------------------------------------------------------------------- + +const kling4kInputSchema = z.object({ + prompt: z.string().describe("Text description guiding the video generation"), + image_url: z.string().url().describe("URL of the start frame image"), + end_image_url: z + .string() + .url() + .optional() + .describe("URL of the end frame image (optional)"), + duration: z + .number() + .int() + .min(3) + .max(15) + .default(5) + .describe("Video duration in seconds (3–15)"), + generate_audio: z + .boolean() + .default(false) + .describe("Whether to generate native audio"), +}); + +const kling4kSchema: ZodSchema< + typeof kling4kInputSchema, + typeof klingOutputSchema +> = { + input: kling4kInputSchema, + output: klingOutputSchema, +}; + +export const kling4kDefinition: ModelDefinition = { + type: "model", + name: "kling-v3-4k", + description: + "Kling O3 4K — native 4K video output from image, no upscaling needed", + providers: ["fal"], + defaultProvider: "fal", + providerModels: { + fal: "fal-ai/kling-video/o3/4k/image-to-video", + }, + schema: kling4kSchema, + pricing: { + fal: { + description: "Kling O3 4K: $0.42/sec regardless of audio", + calculate: ({ duration = 5 }) => 0.42 * duration, + minUsd: 1.26, // 3s + maxUsd: 6.3, // 15s + }, + }, +}; + +// --------------------------------------------------------------------------- +// Kling O3 Reference-to-Video (Pro) — images/elements → video with consistency +// --------------------------------------------------------------------------- + +const klingRefInputSchema = z.object({ + prompt: z + .string() + .optional() + .describe( + "Text prompt for video generation. Reference elements as @Element1, @Element2", + ), + start_image_url: z + .string() + .url() + .optional() + .describe("Image to use as the first frame"), + end_image_url: z + .string() + .url() + .optional() + .describe("Image to use as the last frame"), + image_urls: z + .array(z.string().url()) + .max(7) + .optional() + .describe( + "Reference images for style/appearance. Reference in prompt as @Image1, @Image2. Max 7 total (elements + images)", + ), + duration: z + .number() + .int() + .min(3) + .max(15) + .default(5) + .describe("Video duration in seconds (3–15)"), + aspect_ratio: aspectRatioSchema + .default("16:9") + .describe("Output aspect ratio"), + generate_audio: z + .boolean() + .default(false) + .describe("Whether to generate native audio"), +}); + +const klingRefSchema: ZodSchema< + typeof klingRefInputSchema, + typeof klingOutputSchema +> = { + input: klingRefInputSchema, + output: klingOutputSchema, +}; + +export const klingRefDefinition: ModelDefinition = { + type: "model", + name: "kling-v3-ref", + description: + "Kling O3 Pro reference-to-video — generate video with character/object consistency from reference images and elements", + providers: ["fal"], + defaultProvider: "fal", + providerModels: { + fal: "fal-ai/kling-video/o3/pro/reference-to-video", + }, + schema: klingRefSchema, + pricing: { + fal: { + description: + "Kling O3 Pro ref: $0.112/sec (audio off), $0.14/sec (audio on)", + calculate: ({ duration = 5, generateAudio = false }) => { + const rate = generateAudio ? 0.14 : 0.112; + return rate * duration; + }, + minUsd: 0.336, // 3s * $0.112 + maxUsd: 2.1, // 15s * $0.14 + }, + }, +}; + +// --------------------------------------------------------------------------- +// Kling O3 4K Reference-to-Video — 4K reference-to-video +// --------------------------------------------------------------------------- + +export const kling4kRefDefinition: ModelDefinition = { + type: "model", + name: "kling-v3-4k-ref", + description: + "Kling O3 4K reference-to-video — native 4K video with character/object consistency from reference images and elements", + providers: ["fal"], + defaultProvider: "fal", + providerModels: { + fal: "fal-ai/kling-video/o3/4k/reference-to-video", + }, + schema: klingRefSchema, + pricing: { + fal: { + description: "Kling O3 4K ref: $0.42/sec regardless of audio", + calculate: ({ duration = 5 }) => 0.42 * duration, + minUsd: 1.26, // 3s + maxUsd: 6.3, // 15s + }, + }, +}; + +// --------------------------------------------------------------------------- +// Kling O3 Video-to-Video Reference (Standard) — v2v preserving motion/camera +// --------------------------------------------------------------------------- + +const klingV2VRefInputSchema = z.object({ + prompt: z + .string() + .describe( + "Text prompt for video generation. Reference video as @Video1, elements as @Element1", + ), + video_url: z + .string() + .url() + .describe( + "Reference video URL. Only .mp4/.mov, 3–10s duration, 720–2160px resolution, max 200MB", + ), + image_urls: z + .array(z.string().url()) + .max(4) + .optional() + .describe( + "Reference images for style/appearance. Max 4 total (elements + images) when using video", + ), + keep_audio: z + .boolean() + .default(true) + .describe("Whether to keep the original audio from the reference video"), + duration: z + .number() + .int() + .min(3) + .max(15) + .optional() + .describe("Video duration in seconds (3–15)"), + aspect_ratio: z + .enum(["auto", "16:9", "9:16", "1:1"]) + .default("auto") + .describe("Output aspect ratio"), +}); + +const klingV2VRefSchema: ZodSchema< + typeof klingV2VRefInputSchema, + typeof klingOutputSchema +> = { + input: klingV2VRefInputSchema, + output: klingOutputSchema, +}; + +export const klingV2VRefDefinition: ModelDefinition = + { + type: "model", + name: "kling-v3-v2v-ref", + description: + "Kling O3 video-to-video reference — generate new video guided by reference video, preserving motion and camera style", + providers: ["fal"], + defaultProvider: "fal", + providerModels: { + fal: "fal-ai/kling-video/o3/standard/video-to-video/reference", + }, + schema: klingV2VRefSchema, + pricing: { + fal: { + description: "Kling O3 Standard v2v ref: $0.126/sec", + calculate: ({ duration = 5 }) => 0.126 * duration, + minUsd: 0.378, // 3s + maxUsd: 1.89, // 15s + }, + }, + }; + +// --------------------------------------------------------------------------- +// Kling V3 Motion Control — transfer motion from reference video to character image +// --------------------------------------------------------------------------- + +const klingV3MotionInputSchema = z.object({ + prompt: z.string().optional().describe("Text prompt for video generation"), + image_url: z + .string() + .url() + .describe( + "Reference image URL. Characters should have clear body proportions, avoid occlusion, and occupy >5% of image area", + ), + video_url: z + .string() + .url() + .describe( + "Reference video URL for motion transfer. Max 10s with character_orientation 'image', max 30s with 'video'", + ), + character_orientation: z + .enum(["image", "video"]) + .default("video") + .describe( + "'video': orientation matches reference video, better for complex motions (max 30s). 'image': orientation matches reference image, better for camera movements (max 10s)", + ), + keep_original_sound: z + .boolean() + .default(true) + .describe("Whether to keep the original sound from the reference video"), +}); + +const klingV3MotionSchema: ZodSchema< + typeof klingV3MotionInputSchema, + typeof klingOutputSchema +> = { + input: klingV3MotionInputSchema, + output: klingOutputSchema, +}; + +export const klingV3MotionDefinition: ModelDefinition< + typeof klingV3MotionSchema +> = { + type: "model", + name: "kling-v3-motion", + description: + "Kling V3 Pro motion control — transfer movements from reference video to any character image. Supports up to 30s with video orientation", + providers: ["fal"], + defaultProvider: "fal", + providerModels: { + fal: "fal-ai/kling-video/v3/pro/motion-control", + }, + schema: klingV3MotionSchema, + pricing: { + fal: { + description: "Kling V3 Pro motion control: $0.168/sec", + calculate: ({ inputDuration = 5 }) => 0.168 * inputDuration, + minUsd: 0.504, // 3s + maxUsd: 5.04, // 30s + }, + }, +}; + +export const klingV3MotionStdDefinition: ModelDefinition< + typeof klingV3MotionSchema +> = { + type: "model", + name: "kling-v3-motion-standard", + description: + "Kling V3 Standard motion control — cost-effective motion transfer from reference video to character image. Supports up to 30s with video orientation", + providers: ["fal"], + defaultProvider: "fal", + providerModels: { + fal: "fal-ai/kling-video/v3/standard/motion-control", + }, + schema: klingV3MotionSchema, + pricing: { + fal: { + description: "Kling V3 Standard motion control: $0.126/sec", + calculate: ({ inputDuration = 5 }) => 0.126 * inputDuration, + minUsd: 0.378, // 3s + maxUsd: 3.78, // 30s + }, + }, +}; + export default definition; diff --git a/src/providers/fal.ts b/src/providers/fal.ts index a1a99286..79c824e0 100644 --- a/src/providers/fal.ts +++ b/src/providers/fal.ts @@ -632,6 +632,97 @@ export class FalProvider extends BaseProvider { return result; } + // ============================================================================ + // Video-to-Video + // ============================================================================ + + /** + * Motion control — transfer motion from reference video to character image + * Uses Kling V3 Pro motion control endpoint + */ + async motionControl(args: { + prompt: string; + imageUrl: string; + videoUrl: string; + characterOrientation?: "image" | "video"; + keepOriginalSound?: boolean; + }) { + const modelId = "fal-ai/kling-video/v3/pro/motion-control"; + + console.log(`[fal] starting motion control: ${modelId}`); + console.log(`[fal] prompt: ${args.prompt}`); + + const imageUrl = await ensureUrl(args.imageUrl, (buffer) => + this.uploadFile(buffer), + ); + const videoUrl = await ensureUrl(args.videoUrl, (buffer) => + this.uploadFile(buffer), + ); + + const result = await fal.subscribe(modelId, { + input: { + prompt: args.prompt, + image_url: imageUrl, + video_url: videoUrl, + character_orientation: args.characterOrientation ?? "video", + keep_original_sound: args.keepOriginalSound ?? true, + }, + logs: true, + onQueueUpdate: (update) => { + if (update.status === "IN_PROGRESS") { + console.log( + `[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`, + ); + } + }, + }); + + console.log("[fal] completed!"); + return result; + } + + /** + * Video-to-video reference — generate new video guided by reference video, + * preserving motion and camera style (Kling O3 Standard) + */ + async videoToVideoReference(args: { + prompt: string; + videoUrl: string; + duration?: number; + aspectRatio?: "auto" | "16:9" | "9:16" | "1:1"; + keepAudio?: boolean; + }) { + const modelId = "fal-ai/kling-video/o3/standard/video-to-video/reference"; + + console.log(`[fal] starting v2v reference: ${modelId}`); + console.log(`[fal] prompt: ${args.prompt}`); + + const videoUrl = await ensureUrl(args.videoUrl, (buffer) => + this.uploadFile(buffer), + ); + + const result = await fal.subscribe(modelId, { + input: { + prompt: args.prompt, + video_url: videoUrl, + duration: args.duration ? String(args.duration) : undefined, + aspect_ratio: args.aspectRatio ?? "auto", + keep_audio: args.keepAudio ?? true, + }, + logs: true, + onQueueUpdate: (update) => { + if (update.status === "IN_PROGRESS") { + console.log( + `[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`, + ); + } + }, + }); + + console.log("[fal] completed!"); + return result; + } + /** * Edit video using Grok Imagine Video * Video will be resized to max 854x480 and truncated to 8 seconds @@ -761,3 +852,9 @@ export const ltx2AudioToVideo = ( ) => falProvider.ltx2AudioToVideo(args); export const textToMusic = (args: Parameters[0]) => falProvider.textToMusic(args); +export const motionControl = ( + args: Parameters[0], +) => falProvider.motionControl(args); +export const videoToVideoReference = ( + args: Parameters[0], +) => falProvider.videoToVideoReference(args); diff --git a/src/react/renderers/progress.ts b/src/react/renderers/progress.ts index be4a0ac4..60af7e4d 100644 --- a/src/react/renderers/progress.ts +++ b/src/react/renderers/progress.ts @@ -31,6 +31,12 @@ export const MODEL_TIME_ESTIMATES: Record = { "kling-v2": 180, "kling-v2.5": 180, "kling-v2.6": 180, + "kling-v3-4k": 240, + "kling-v3-ref": 180, + "kling-v3-4k-ref": 240, + "kling-v3-v2v-ref": 180, + "kling-v3-motion": 300, + "kling-v3-motion-standard": 240, "kling-v2.6-motion": 240, "kling-v2.6-motion-standard": 180, minimax: 90,