diff --git a/src/vulca/providers/gemini.py b/src/vulca/providers/gemini.py index 4fb16a2f..e9206854 100644 --- a/src/vulca/providers/gemini.py +++ b/src/vulca/providers/gemini.py @@ -6,6 +6,7 @@ import asyncio import base64 +import io import math import os @@ -66,6 +67,20 @@ def _detect_mime_type(data: bytes) -> str: return "image/png" +def _build_visible_mask_reference(mask_bytes: bytes) -> bytes: + """Render an alpha edit mask as visible black/white guidance for Gemini.""" + from PIL import Image + + with Image.open(io.BytesIO(mask_bytes)) as mask_img: + alpha = mask_img.convert("RGBA").split()[-1] + visible = alpha.point(lambda value: 255 if value < 128 else 0) + rgb = Image.new("RGB", visible.size, (0, 0, 0)) + rgb.paste((255, 255, 255), mask=visible) + buf = io.BytesIO() + rgb.save(buf, format="PNG") + return buf.getvalue() + + class GeminiImageProvider: """Image generation via Google Gemini API. @@ -279,6 +294,7 @@ async def inpaint_with_mask( image_bytes = image_fh.read() with open(mask_path, "rb") as mask_fh: mask_bytes = mask_fh.read() + visible_mask_bytes = _build_visible_mask_reference(mask_bytes) if size and "x" in size: try: @@ -295,11 +311,15 @@ async def inpaint_with_mask( aspect_ratio = _dims_to_aspect_ratio(width, height) full_prompt = ( f"{prompt}\n\n" - "Use the first image as the source crop. Use the second image as an " - "RGBA edit mask: transparent mask pixels mark the edit region, and " - "opaque mask pixels mark source context that should stay visually " - "preserved. Repaint only the transparent mask pixels. Do not create " - "a new scene outside the masked replacement area." + "Use the first image as the source crop. Use the second image as a " + "visible binary edit mask rendered from the original RGBA mask: " + "white mask pixels (original transparent mask pixels) mark the edit " + "region, and black mask pixels (original opaque mask pixels) mark " + "source context that should stay visually preserved. Repaint only " + "the white edit region. Do not create a new scene outside the masked " + "replacement area. The mask image is not part of the output: do not " + "draw the mask, do not copy its white or black shapes, and do not " + "leave mask-colored background behind." ) if tradition and tradition != "default": full_prompt += ( @@ -314,8 +334,8 @@ async def inpaint_with_mask( mime_type=_detect_mime_type(image_bytes), ), types.Part.from_bytes( - data=mask_bytes, - mime_type=_detect_mime_type(mask_bytes), + data=visible_mask_bytes, + mime_type="image/png", ), full_prompt, ] diff --git a/tests/test_gemini_image_size.py b/tests/test_gemini_image_size.py index 23ffc98a..ec187fa4 100644 --- a/tests/test_gemini_image_size.py +++ b/tests/test_gemini_image_size.py @@ -202,8 +202,17 @@ def __init__(self, api_key): assert recorded["model"] == "gemini-3.1-flash-image-preview" assert recorded["contents"][0].mime_type == "image/png" assert recorded["contents"][1].mime_type == "image/png" + visible_mask = Image.open(io.BytesIO(recorded["contents"][1].data)).convert( + "RGB" + ) + assert visible_mask.getpixel((8, 6)) == (255, 255, 255) + assert visible_mask.getpixel((0, 0)) == (0, 0, 0) assert "transparent mask pixels" in recorded["contents"][2] assert "opaque mask pixels" in recorded["contents"][2] + assert "white mask pixels" in recorded["contents"][2] + assert "black mask pixels" in recorded["contents"][2] + assert "do not draw the mask" in recorded["contents"][2].lower() + assert "not part of the output" in recorded["contents"][2].lower() assert "paint one compact yellow flower head" in recorded["contents"][2] assert result.mime == "image/png" assert base64.b64decode(result.image_b64).startswith(b"\x89PNG")