BlockRunAI · 1bcMax · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/src/tools/imagegen.ts b/src/tools/imagegen.ts
@@ -27,6 +27,17 @@ interface ImageGenInput {
   output_path?: string;
   size?: string;
   model?: string;
+  /**
+   * Optional reference image for image-to-image generation (style transfer,
+   * character consistency, edits). When set, the call is routed to
+   * /v1/images/image2image instead of /v1/images/generations and only models
+   * that support reference images may be used (gpt-image-1/2,
+   * nano-banana-pro, grok-imagine-image-pro). Accepts:
+   *   - http(s) URL — fetched server-side
+   *   - data URI (data:image/...;base64,...)
+   *   - local file path — read, base64-encoded, capped at ~4 MB
+   */
+  image_url?: string;
   /**
    * Optional Content id to attach this generation to. When provided:
    *   (1) Budget is checked BEFORE the paid generation — refusing up-front
@@ -37,6 +48,75 @@ interface ImageGenInput {
   contentId?: string;
 }
 
+/**
+ * Models that accept a reference image via /v1/images/image2image. Currently
+ * limited to OpenAI's edit endpoint — Gemini Nano Banana Pro and Grok Imagine
+ * Image Pro need gateway-side support before they can be wired in here.
+ */
+export const EDIT_SUPPORTED_MODELS = new Set([
+  'openai/gpt-image-1',
+  'openai/gpt-image-2',
+]);
+
+export const REFERENCE_IMAGE_MAX_BYTES = 4_000_000;
+
+/**
+ * Normalize a reference image into a base64 data URI for the gateway. The
+ * /v1/images/image2image endpoint validates `image` against /^data:image\//,
+ * so http(s) URLs and local paths both have to be inlined client-side before
+ * posting. Already-formed data URIs pass through.
+ */
+export async function resolveReferenceImage(input: string, workingDir: string): Promise<string> {
+  if (input.startsWith('data:image/')) return input;
+
+  if (/^https?:\/\//i.test(input)) {
+    const ctrl = new AbortController();
+    const timeout = setTimeout(() => ctrl.abort(), 30_000);
+    try {
+      const resp = await fetch(input, { signal: ctrl.signal });
+      if (!resp.ok) {
+        throw new Error(`Reference image fetch failed: ${resp.status} ${resp.statusText}`);
+      }
+      const contentType = (resp.headers.get('content-type') || '').toLowerCase().split(';')[0].trim();
+      if (!contentType.startsWith('image/')) {
+        throw new Error(`Reference image URL returned non-image content-type: ${contentType || '(none)'}`);
+      }
+      const buf = Buffer.from(await resp.arrayBuffer());
+      if (buf.byteLength > REFERENCE_IMAGE_MAX_BYTES) {
+        throw new Error(
+          `Reference image too large: ${(buf.byteLength / 1_000_000).toFixed(1)}MB > ${(REFERENCE_IMAGE_MAX_BYTES / 1_000_000).toFixed(1)}MB cap.`,
+        );
+      }
+      return `data:${contentType};base64,${buf.toString('base64')}`;
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  // Treat as local file path.
+  const resolved = path.isAbsolute(input) ? input : path.resolve(workingDir, input);
+  const stat = fs.statSync(resolved);
+  if (stat.size > REFERENCE_IMAGE_MAX_BYTES) {
+    throw new Error(
+      `Reference image too large: ${(stat.size / 1_000_000).toFixed(1)}MB > ${(REFERENCE_IMAGE_MAX_BYTES / 1_000_000).toFixed(1)}MB cap. Resize or crop first.`,
+    );
+  }
+  const ext = path.extname(resolved).toLowerCase();
+  const mimeMap: Record<string, string> = {
+    '.png': 'image/png',
+    '.jpg': 'image/jpeg',
+    '.jpeg': 'image/jpeg',
+    '.gif': 'image/gif',
+    '.webp': 'image/webp',
+  };
+  const mime = mimeMap[ext];
+  if (!mime) {
+    throw new Error(`Unsupported reference image extension ${ext || '(none)'}. Use .png/.jpg/.jpeg/.gif/.webp.`);
+  }
+  const bytes = fs.readFileSync(resolved);
+  return `data:${mime};base64,${bytes.toString('base64')}`;
+}
+
 export interface ImageGenDeps {
   /** Optional Content library for auto-recording generations into a piece. */
   library?: ContentLibrary;
@@ -50,12 +130,24 @@ function buildExecute(deps: ImageGenDeps) {
     ctx: ExecutionScope,
   ): Promise<CapabilityResult> {
     const rawInput = input as unknown as ImageGenInput;
-    const { output_path, size, model, contentId } = rawInput;
+    const { output_path, size, model, contentId, image_url } = rawInput;
 
     if (!rawInput.prompt) {
       return { output: 'Error: prompt is required', isError: true };
     }
 
+    // Resolve the reference image (if any) before any paid call so we fail
+    // cheaply on bad paths / oversize attachments. Holds the resolved data URI
+    // / http URL that gets posted to /v1/images/image2image.
+    let referenceImage: string | undefined;
+    if (image_url) {
+      try {
+        referenceImage = await resolveReferenceImage(image_url, ctx.workingDir);
+      } catch (err) {
+        return { output: `Error: ${(err as Error).message}`, isError: true };
+      }
+    }
+
     // One-shot refinement opt-out: leading `///` tells Franklin "don't
     // refine this prompt, I wrote it the way I want it." Strip the prefix
     // and pass skipRefine through to the router.
@@ -72,12 +164,29 @@ function buildExecute(deps: ImageGenDeps) {
     // step and use the old default. Otherwise: classifier picks a fitting
     // model + rewrites the prompt, the preview goes to AskUser, user
     // chooses or cancels.
-    let imageModel = model || 'openai/gpt-image-1';
+    // Reference-image mode forces an edit-capable model. If the caller named
+    // an unsupported one, fail loudly so we don't silently downgrade their
+    // request to text-only generation.
+    if (referenceImage && model && !EDIT_SUPPORTED_MODELS.has(model)) {
+      return {
+        output:
+          `Error: model ${model} does not support reference images. ` +
+          `Use one of: ${[...EDIT_SUPPORTED_MODELS].join(', ')}.`,
+        isError: true,
+      };
+    }
+
+    let imageModel = model || (referenceImage ? 'openai/gpt-image-2' : 'openai/gpt-image-1');
     const imageSize = size || '1024x1024';
     let chosenPrompt = prompt;
 
+    // Skip the proposal flow when a reference image is set: the media router
+    // doesn't know which models support image-to-image, so its suggestions
+    // would frequently be unusable (text-only models). Default to gpt-image-1
+    // for now; a future router upgrade can pick between the four edit-capable
+    // models based on the prompt.
     const autoApprove = process.env.FRANKLIN_MEDIA_AUTO_APPROVE_ALL === '1';
-    if (!model && !autoApprove && ctx.onAskUser) {
+    if (!model && !autoApprove && ctx.onAskUser && !referenceImage) {
       try {
         const chain = loadChain();
         const client = new ModelClient({ apiUrl: API_URLS[chain], chain });
@@ -137,20 +246,34 @@ function buildExecute(deps: ImageGenDeps) {
 
   const chain = loadChain();
   const apiUrl = API_URLS[chain];
-  const endpoint = `${apiUrl}/v1/images/generations`;
+  // Reference-image mode hits the dedicated /v1/images/image2image endpoint;
+  // otherwise stay on text-to-image generations.
+  const endpoint = referenceImage
+    ? `${apiUrl}/v1/images/image2image`
+    : `${apiUrl}/v1/images/generations`;
 
   // Default output path
   const outPath = output_path
     ? (path.isAbsolute(output_path) ? output_path : path.resolve(ctx.workingDir, output_path))
     : path.resolve(ctx.workingDir, `generated-${Date.now()}.png`);
 
-  const body = JSON.stringify({
-    model: imageModel,
-    prompt: chosenPrompt,
-    n: 1,
-    size: imageSize,
-    response_format: 'b64_json',
-  });
+  const body = JSON.stringify(
+    referenceImage
+      ? {
+          model: imageModel,
+          prompt: chosenPrompt,
+          image: referenceImage,
+          size: imageSize,
+          n: 1,
+        }
+      : {
+          model: imageModel,
+          prompt: chosenPrompt,
+          n: 1,
+          size: imageSize,
+          response_format: 'b64_json',
+        },
+  );
 
   const headers: Record<string, string> = {
     'Content-Type': 'application/json',
@@ -173,7 +296,7 @@ function buildExecute(deps: ImageGenDeps) {
     if (response.status === 402) {
       const paymentHeaders = await signPayment(response, chain, endpoint);
       if (!paymentHeaders) {
-        return { output: 'Payment failed. Check wallet balance with: runcode balance', isError: true };
+        return { output: 'Payment failed. Check wallet balance with: franklin balance', isError: true };
       }
 
       response = await fetch(endpoint, {
@@ -198,11 +321,21 @@ function buildExecute(deps: ImageGenDeps) {
       return { output: 'No image data returned from API', isError: true };
     }
 
-    // Save image
+    // Save image. The /v1/images/image2image endpoint returns Gemini results
+    // as a data URI in `url`, so decode those locally instead of going through
+    // fetch — saves a network round-trip and avoids data:-URI fetch quirks.
     if (imageData.b64_json) {
       const buffer = Buffer.from(imageData.b64_json, 'base64');
       fs.mkdirSync(path.dirname(outPath), { recursive: true });
       fs.writeFileSync(outPath, buffer);
+    } else if (imageData.url && imageData.url.startsWith('data:')) {
+      const match = imageData.url.match(/^data:[^;]+;base64,(.+)$/);
+      if (!match) {
+        return { output: 'Malformed data URI in response', isError: true };
+      }
+      const buffer = Buffer.from(match[1], 'base64');
+      fs.mkdirSync(path.dirname(outPath), { recursive: true });
+      fs.writeFileSync(outPath, buffer);
     } else if (imageData.url) {
       // Download from URL (with 30s timeout)
       const dlCtrl = new AbortController();
@@ -290,7 +423,7 @@ async function signPayment(
         feePayer as string,
         {
           resourceUrl: details.resource?.url || endpoint,
-          resourceDescription: details.resource?.description || 'RunCode image generation',
+          resourceDescription: details.resource?.description || 'Franklin image generation',
           maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
           extra: details.extra as Record<string, unknown> | undefined,
         }
@@ -309,7 +442,7 @@ async function signPayment(
         details.network || 'eip155:8453',
         {
           resourceUrl: details.resource?.url || endpoint,
-          resourceDescription: details.resource?.description || 'RunCode image generation',
+          resourceDescription: details.resource?.description || 'Franklin image generation',
           maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
           extra: details.extra as Record<string, unknown> | undefined,
         }
@@ -347,20 +480,24 @@ export function createImageGenCapability(deps: ImageGenDeps = {}): CapabilityHan
     spec: {
       name: 'ImageGen',
       description:
-        "Generate an image from a text prompt. Costs USDC from the user's wallet " +
-        "— confirm before generating. Saves to a local file. Default size: " +
-        "1024x1024. Do NOT call repeatedly to iterate on style — ask the user " +
-        "first. Pass contentId to attach the result to an existing Content " +
-        "piece: the content's budget is checked BEFORE paying, and on success " +
-        "the image is recorded as an asset with its estimated cost. Skipping " +
-        "contentId generates a one-off image with no budget tracking.",
+        "Generate an image from a text prompt — optionally with a reference " +
+        "image for style transfer / character consistency / edits. Costs USDC " +
+        "from the user's wallet — confirm before generating. Saves to a local " +
+        "file. Default size: 1024x1024. Do NOT call repeatedly to iterate on " +
+        "style — ask the user first. Pass contentId to attach the result to " +
+        "an existing Content piece: the content's budget is checked BEFORE " +
+        "paying, and on success the image is recorded as an asset with its " +
+        "estimated cost. Skipping contentId generates a one-off image with no " +
+        "budget tracking. When image_url is set, only edit-capable models " +
+        "(openai/gpt-image-1, openai/gpt-image-2) are accepted.",
       input_schema: {
         type: 'object',
         properties: {
           prompt: { type: 'string', description: 'Text description of the image to generate' },
           output_path: { type: 'string', description: 'Where to save the image. Default: generated-<timestamp>.png in working directory' },
           size: { type: 'string', description: 'Image size: 1024x1024, 1792x1024, or 1024x1792. Default: 1024x1024' },
           model: { type: 'string', description: 'Image model to use. Default: openai/gpt-image-1' },
+          image_url: { type: 'string', description: 'Optional reference image (image-to-image / style transfer). Accepts an http(s) URL, a data URI, or a local file path. Only works with edit-capable models.' },
           contentId: { type: 'string', description: 'Optional Content id to attach this generation to. Pre-flight budget check + auto-record on success.' },
         },
         required: ['prompt'],

diff --git a/test/local.mjs b/test/local.mjs
@@ -272,7 +272,7 @@ test('write capability allows files under system temp directory', async () => {
 test('session storage falls back to temp dir when HOME is not writable', async () => {
   const originalHome = process.env.HOME;
   const fakeHome = mkdtempSync(join(tmpdir(), 'rc-home-ro-'));
-  const fallbackDir = join(tmpdir(), 'runcode', 'sessions');
+  const fallbackDir = join(tmpdir(), 'franklin', 'sessions');
 
   try {
     mkdirSync(fakeHome, { recursive: true });
@@ -3944,3 +3944,109 @@ test('version-check: getAvailableUpdate reflects cache vs installed version', as
     else if (fs.existsSync(cacheFile)) fs.unlinkSync(cacheFile);
   }
 });
+
+test('imagegen: resolveReferenceImage passes data URIs through unchanged', async () => {
+  const { resolveReferenceImage } = await import('../dist/tools/imagegen.js');
+
+  // Pre-formed data URIs are already in the gateway-required shape.
+  const dataUri = 'data:image/png;base64,iVBORw0KGgo=';
+  assert.equal(await resolveReferenceImage(dataUri, '/tmp'), dataUri);
+});
+
+test('imagegen: resolveReferenceImage fetches http(s) URLs and inlines them as data URIs', async () => {
+  const { resolveReferenceImage } = await import('../dist/tools/imagegen.js');
+  const pngBytes = Buffer.from(
+    'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
+    'base64',
+  );
+
+  const server = createServer((req, res) => {
+    if (req.url === '/img.png') {
+      res.writeHead(200, { 'Content-Type': 'image/png', 'Content-Length': pngBytes.length });
+      res.end(pngBytes);
+    } else if (req.url === '/text.html') {
+      res.writeHead(200, { 'Content-Type': 'text/html' });
+      res.end('<html></html>');
+    } else if (req.url === '/missing.png') {
+      res.writeHead(404, { 'Content-Type': 'text/plain' });
+      res.end('not found');
+    } else {
+      res.writeHead(500); res.end();
+    }
+  });
+  await new Promise(resolve => server.listen(0, '127.0.0.1', resolve));
+  const port = server.address().port;
+  try {
+    const out = await resolveReferenceImage(`http://127.0.0.1:${port}/img.png`, '/tmp');
+    assert.match(out, /^data:image\/png;base64,/, 'url should round-trip into a data URI');
+    const decoded = Buffer.from(out.split(',')[1], 'base64');
+    assert.ok(decoded.equals(pngBytes), 'fetched bytes must match original');
+
+    // Non-image content-type → reject before we waste a paid call.
+    await assert.rejects(
+      () => resolveReferenceImage(`http://127.0.0.1:${port}/text.html`, '/tmp'),
+      /non-image content-type/,
+    );
+
+    // Upstream errors surface clearly.
+    await assert.rejects(
+      () => resolveReferenceImage(`http://127.0.0.1:${port}/missing.png`, '/tmp'),
+      /Reference image fetch failed: 404/,
+    );
+  } finally {
+    await new Promise(resolve => server.close(resolve));
+  }
+});
+
+test('imagegen: resolveReferenceImage reads and base64-encodes a local image', async () => {
+  const { resolveReferenceImage } = await import('../dist/tools/imagegen.js');
+  const tmp = mkdtempSync(join(tmpdir(), 'imagegen-ref-'));
+  const imgPath = join(tmp, 'pixel.png');
+  // 1x1 transparent PNG.
+  const pngBytes = Buffer.from(
+    'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
+    'base64',
+  );
+  writeFileSync(imgPath, pngBytes);
+
+  try {
+    const out = await resolveReferenceImage(imgPath, '/tmp');
+    assert.match(out, /^data:image\/png;base64,/);
+    const decoded = Buffer.from(out.split(',')[1], 'base64');
+    assert.ok(decoded.equals(pngBytes), 'round-trip should preserve bytes');
+
+    // Relative paths resolve against workingDir.
+    const relOut = await resolveReferenceImage('pixel.png', tmp);
+    assert.equal(relOut, out);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test('imagegen: resolveReferenceImage rejects unsupported extensions and oversized files', async () => {
+  const { resolveReferenceImage, REFERENCE_IMAGE_MAX_BYTES } = await import('../dist/tools/imagegen.js');
+  const tmp = mkdtempSync(join(tmpdir(), 'imagegen-ref-'));
+
+  try {
+    // Unsupported extension.
+    const txt = join(tmp, 'note.txt');
+    writeFileSync(txt, 'hello');
+    await assert.rejects(() => resolveReferenceImage(txt, '/tmp'), /Unsupported reference image extension/);
+
+    // Oversized PNG.
+    const big = join(tmp, 'huge.png');
+    writeFileSync(big, Buffer.alloc(REFERENCE_IMAGE_MAX_BYTES + 1, 0));
+    await assert.rejects(() => resolveReferenceImage(big, '/tmp'), /Reference image too large/);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+
+test('imagegen: EDIT_SUPPORTED_MODELS lists OpenAI image-edit models', async () => {
+  const { EDIT_SUPPORTED_MODELS } = await import('../dist/tools/imagegen.js');
+  assert.ok(EDIT_SUPPORTED_MODELS.has('openai/gpt-image-1'));
+  assert.ok(EDIT_SUPPORTED_MODELS.has('openai/gpt-image-2'));
+  // Other providers can be added once the gateway wires them up.
+  assert.ok(!EDIT_SUPPORTED_MODELS.has('google/nano-banana-pro'));
+  assert.ok(!EDIT_SUPPORTED_MODELS.has('xai/grok-imagine-image-pro'));
+});