Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/ai-sdk/middleware/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ export {
type PlaceholderOptions,
type PlaceholderResult,
} from "./placeholder";
export {
type PrerenderFallbackOptions,
prerenderFallbackMiddleware,
} from "./prerender";
export {
type ImagePlaceholderFallbackOptions,
imagePlaceholderFallbackMiddleware,
Expand Down
205 changes: 205 additions & 0 deletions src/ai-sdk/middleware/prerender.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import { unlink } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import type { ImageModelV3 } from "@ai-sdk/provider";
import type { generateImage } from "ai";
import type { VideoModelV3CallOptions } from "../video-model";
import type { VideoModelMiddleware } from "./wrap-video-model";

export interface PrerenderFallbackOptions {
/**
* Image model to use for generating still frames from text-to-video prompts.
* When a Video element has only a text prompt (no input images), this model
* generates the placeholder image that becomes the still frame.
*/
imageModel: ImageModelV3;

/**
* The generateImage function to use (should be the cached version).
*/
generateImageFn: typeof generateImage;

/**
* Callback when a video is replaced with a still frame.
*/
onPrerender?: (prompt: string, hasInputImage: boolean) => void;
}

/**
* Creates a still-frame video from an image using ffmpeg.
* The video holds the image for the specified duration.
*/
async function imageToStillVideo(
imageData: Uint8Array,
duration: number,
resolution?: string,
): Promise<Uint8Array> {
const ts = Date.now();
const rand = Math.random().toString(36).slice(2);
const imgPath = join(tmpdir(), `prerender_img_${ts}_${rand}.png`);
const outPath = join(tmpdir(), `prerender_vid_${ts}_${rand}.mp4`);

try {
await Bun.write(imgPath, imageData);

// Parse resolution for scaling, default to 1080x1920
let scaleFilter = "";
if (resolution) {
const [w, h] = resolution.split("x").map(Number);
if (w && h) {
scaleFilter = `-vf scale=${w}:${h}:force_original_aspect_ratio=decrease,pad=${w}:${h}:(ow-iw)/2:(oh-ih)/2`;
}
}

const { $ } = await import("bun");

const args = [
"ffmpeg",
"-y",
"-loop",
"1",
"-i",
imgPath,
"-t",
String(duration),
"-r",
"30",
...(scaleFilter ? scaleFilter.split(" ") : []),
"-c:v",
"libx264",
"-preset",
"ultrafast",
"-pix_fmt",
"yuv420p",
"-tune",
"stillimage",
outPath,
];

const result = await $`${args}`.quiet().nothrow();

if (result.exitCode !== 0) {
const stderr = result.stderr.toString().trim();
throw new Error(
`ffmpeg still-frame failed (exit ${result.exitCode}): ${stderr || "unknown error"}`,
);
}

const data = await Bun.file(outPath).bytes();
return new Uint8Array(data);
} finally {
await unlink(imgPath).catch(() => {});
await unlink(outPath).catch(() => {});
}
}

/**
* Extracts the first image file from VideoModelV3CallOptions.files.
* Returns the image data if found, undefined otherwise.
*/
async function extractFirstImage(
params: VideoModelV3CallOptions,
): Promise<Uint8Array | undefined> {
if (!params.files) return undefined;

for (const file of params.files) {
if (file.type === "file" && file.mediaType?.startsWith("image/")) {
if (file.data instanceof Uint8Array) {
return file.data;
}
if (typeof file.data === "string") {
// base64
return Uint8Array.from(atob(file.data), (c) => c.charCodeAt(0));
}
}
if (file.type === "url") {
// Fetch the URL to get binary data
try {
const response = await fetch(file.url);
const contentType = response.headers.get("content-type") ?? "";
if (contentType.startsWith("image/")) {
return new Uint8Array(await response.arrayBuffer());
}
} catch {
// Skip URLs that can't be fetched
}
}
}

return undefined;
Comment on lines +100 to +129
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

non-image source inputs get dropped here

the public VideoPrompt already allows audio and video sources, but this path only preserves image files. when params.files contains a video/audio input, extractFirstImage() falls through and we synthesize a fresh t2v frame from the prompt instead, which changes the scene you're trying to review. at minimum, fail fast for unsupported params.files; ideally extract a poster or first frame for video inputs.

Also applies to: 152-179

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/ai-sdk/middleware/prerender.ts` around lines 100 - 129, extractFirstImage
currently only preserves image files and silently falls through for audio/video
inputs; update extractFirstImage (and the same logic at the other occurrence) to
explicitly handle non-image media: detect mediaType starting with "video/" and
"audio/"; for "video/" try to extract a poster or first frame (e.g., call a new
helper like extractFirstFrameFromVideo(file.data) or use file.metadata?.poster
if present and return its Uint8Array), and for "audio/" (or when video frame
extraction fails) fail fast by throwing a clear error (or return a distinct
failure) so we do not silently synthesize a T2V frame—add/implement
extractFirstFrameFromVideo(data: Uint8Array | string): Promise<Uint8Array |
undefined> and use it inside extractFirstImage for file.type === "file" and
mediaType.startsWith("video/"), otherwise throw an error describing the
unsupported non-image input.

}

/**
* Middleware that replaces video generation with still-frame images.
*
* - For i2v (image-to-video): uses the input image as the still frame
* - For t2v (text-to-video): generates an image using the configured image
* model and uses it as the still frame
*
* The resulting video has the exact duration specified in the clip,
* making it suitable for verifying visual-audio sync before expensive
* video generation.
*/
export function prerenderFallbackMiddleware(
options: PrerenderFallbackOptions,
): VideoModelMiddleware {
const { imageModel, generateImageFn, onPrerender } = options;

return {
wrapGenerate: async ({ doGenerate, params, model }) => {
const duration = params.duration ?? 3;

// Try to extract an existing image from the input files (i2v case)
const inputImage = await extractFirstImage(params);

let frameImage: Uint8Array;

if (inputImage) {
// i2v: use the input image directly as the still frame
frameImage = inputImage;
onPrerender?.(params.prompt, true);
} else {
// t2v: generate an image from the text prompt
const prompt = params.prompt || "placeholder";
onPrerender?.(prompt, false);

const { images } = await generateImageFn({
model: imageModel,
prompt,
n: 1,
aspectRatio: params.aspectRatio,
} as Parameters<typeof generateImage>[0]);

const firstImage = images[0];
if (!firstImage?.uint8Array) {
throw new Error(
`prerender: image generation returned no data for prompt: ${prompt.slice(0, 80)}`,
);
}
frameImage = firstImage.uint8Array;
}

// Create still-frame video with exact duration
const videoData = await imageToStillVideo(
frameImage,
duration,
params.resolution,
);

return {
videos: [videoData],
warnings: [
{
type: "other" as const,
message: `prerender: still frame (${inputImage ? "i2v input" : "t2v generated"}, ${duration}s)`,
},
],
response: {
timestamp: new Date(),
modelId: `prerender:${model.modelId}`,
headers: undefined,
},
};
},
};
}
2 changes: 1 addition & 1 deletion src/ai-sdk/middleware/wrap-video-model.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { VideoModelV3, VideoModelV3CallOptions } from "../video-model";
import { generatePlaceholder } from "./placeholder";

export type RenderMode = "strict" | "preview";
export type RenderMode = "strict" | "preview" | "prerender";

export interface VideoModelMiddleware {
transformParams?: (options: {
Expand Down
1 change: 1 addition & 0 deletions src/cli/commands/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export { initCmd, showInitHelp } from "./init.tsx";
export { listCmd, showListHelp } from "./list.tsx";
export { loginCmd } from "./login.tsx";
export { logoutCmd } from "./logout.ts";
export { prerenderCmd, showPrerenderHelp } from "./prerender.tsx";
export {
previewCmd,
renderCmd,
Expand Down
Loading
Loading