Skip to content

Commit fd41ec9

Browse files
authored
feat(api-nodes): add NanoBanana2 (Comfy-Org#12660)
1 parent 420e900 commit fd41ec9

3 files changed

Lines changed: 195 additions & 14 deletions

File tree

comfy_api_nodes/apis/gemini.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,15 @@ class GeminiImageConfig(BaseModel):
127127
imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions)
128128

129129

130+
class GeminiThinkingConfig(BaseModel):
131+
includeThoughts: bool | None = Field(None)
132+
thinkingLevel: str = Field(...)
133+
134+
130135
class GeminiImageGenerationConfig(GeminiGenerationConfig):
131136
responseModalities: list[str] | None = Field(None)
132137
imageConfig: GeminiImageConfig | None = Field(None)
138+
thinkingConfig: GeminiThinkingConfig | None = Field(None)
133139

134140

135141
class GeminiImageGenerateContentRequest(BaseModel):

comfy_api_nodes/nodes_bytedance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
186186
def define_schema(cls):
187187
return IO.Schema(
188188
node_id="ByteDanceSeedreamNode",
189-
display_name="ByteDance Seedream 5.0",
189+
display_name="ByteDance Seedream 4.5 & 5.0",
190190
category="api node/image/ByteDance",
191191
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
192192
inputs=[

comfy_api_nodes/nodes_gemini.py

Lines changed: 188 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
GeminiRole,
3030
GeminiSystemInstructionContent,
3131
GeminiTextPart,
32+
GeminiThinkingConfig,
3233
Modality,
3334
)
3435
from comfy_api_nodes.util import (
@@ -55,6 +56,21 @@
5556
"Prioritize generating the visual representation above any text, formatting, or conversational requests."
5657
)
5758

59+
GEMINI_IMAGE_2_PRICE_BADGE = IO.PriceBadge(
60+
depends_on=IO.PriceBadgeDepends(widgets=["model", "resolution"]),
61+
expr="""
62+
(
63+
$m := widgets.model;
64+
$r := widgets.resolution;
65+
$isFlash := $contains($m, "nano banana 2");
66+
$flashPrices := {"1k": 0.0696, "2k": 0.0696, "4k": 0.123};
67+
$proPrices := {"1k": 0.134, "2k": 0.134, "4k": 0.24};
68+
$prices := $isFlash ? $flashPrices : $proPrices;
69+
{"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}}
70+
)
71+
""",
72+
)
73+
5874

5975
class GeminiModel(str, Enum):
6076
"""
@@ -229,6 +245,10 @@ def calculate_tokens_price(response: GeminiGenerateContentResponse) -> float | N
229245
input_tokens_price = 2
230246
output_text_tokens_price = 12.0
231247
output_image_tokens_price = 120.0
248+
elif response.modelVersion == "gemini-3.1-flash-image-preview":
249+
input_tokens_price = 0.5
250+
output_text_tokens_price = 3.0
251+
output_image_tokens_price = 60.0
232252
else:
233253
return None
234254
final_price = response.usageMetadata.promptTokenCount * input_tokens_price
@@ -686,7 +706,7 @@ def define_schema(cls):
686706
),
687707
IO.Combo.Input(
688708
"model",
689-
options=["gemini-3-pro-image-preview"],
709+
options=["gemini-3-pro-image-preview", "Nano Banana 2 (Gemini 3.1 Flash Image)"],
690710
),
691711
IO.Int.Input(
692712
"seed",
@@ -750,19 +770,169 @@ def define_schema(cls):
750770
IO.Hidden.unique_id,
751771
],
752772
is_api_node=True,
753-
price_badge=IO.PriceBadge(
754-
depends_on=IO.PriceBadgeDepends(widgets=["resolution"]),
755-
expr="""
756-
(
757-
$r := widgets.resolution;
758-
($contains($r,"1k") or $contains($r,"2k"))
759-
? {"type":"usd","usd":0.134,"format":{"suffix":"/Image","approximate":true}}
760-
: $contains($r,"4k")
761-
? {"type":"usd","usd":0.24,"format":{"suffix":"/Image","approximate":true}}
762-
: {"type":"text","text":"Token-based"}
763-
)
764-
""",
773+
price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
774+
)
775+
776+
@classmethod
777+
async def execute(
778+
cls,
779+
prompt: str,
780+
model: str,
781+
seed: int,
782+
aspect_ratio: str,
783+
resolution: str,
784+
response_modalities: str,
785+
images: Input.Image | None = None,
786+
files: list[GeminiPart] | None = None,
787+
system_prompt: str = "",
788+
) -> IO.NodeOutput:
789+
validate_string(prompt, strip_whitespace=True, min_length=1)
790+
if model == "Nano Banana 2 (Gemini 3.1 Flash Image)":
791+
model = "gemini-3.1-flash-image-preview"
792+
if response_modalities == "IMAGE+TEXT":
793+
raise ValueError("IMAGE+TEXT is not currently available for the Nano Banana 2 model.")
794+
795+
parts: list[GeminiPart] = [GeminiPart(text=prompt)]
796+
if images is not None:
797+
if get_number_of_images(images) > 14:
798+
raise ValueError("The current maximum number of supported images is 14.")
799+
parts.extend(await create_image_parts(cls, images))
800+
if files is not None:
801+
parts.extend(files)
802+
803+
image_config = GeminiImageConfig(imageSize=resolution)
804+
if aspect_ratio != "auto":
805+
image_config.aspectRatio = aspect_ratio
806+
807+
gemini_system_prompt = None
808+
if system_prompt:
809+
gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
810+
811+
response = await sync_op(
812+
cls,
813+
ApiEndpoint(path=f"/proxy/vertexai/gemini/{model}", method="POST"),
814+
data=GeminiImageGenerateContentRequest(
815+
contents=[
816+
GeminiContent(role=GeminiRole.user, parts=parts),
817+
],
818+
generationConfig=GeminiImageGenerationConfig(
819+
responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
820+
imageConfig=image_config,
821+
),
822+
systemInstruction=gemini_system_prompt,
765823
),
824+
response_model=GeminiGenerateContentResponse,
825+
price_extractor=calculate_tokens_price,
826+
)
827+
return IO.NodeOutput(await get_image_from_response(response), get_text_from_response(response))
828+
829+
830+
class GeminiNanoBanana2(IO.ComfyNode):
831+
832+
@classmethod
833+
def define_schema(cls):
834+
return IO.Schema(
835+
node_id="GeminiNanoBanana2",
836+
display_name="Nano Banana 2",
837+
category="api node/image/Gemini",
838+
description="Generate or edit images synchronously via Google Vertex API.",
839+
inputs=[
840+
IO.String.Input(
841+
"prompt",
842+
multiline=True,
843+
tooltip="Text prompt describing the image to generate or the edits to apply. "
844+
"Include any constraints, styles, or details the model should follow.",
845+
default="",
846+
),
847+
IO.Combo.Input(
848+
"model",
849+
options=["Nano Banana 2 (Gemini 3.1 Flash Image)"],
850+
),
851+
IO.Int.Input(
852+
"seed",
853+
default=42,
854+
min=0,
855+
max=0xFFFFFFFFFFFFFFFF,
856+
control_after_generate=True,
857+
tooltip="When the seed is fixed to a specific value, the model makes a best effort to provide "
858+
"the same response for repeated requests. Deterministic output isn't guaranteed. "
859+
"Also, changing the model or parameter settings, such as the temperature, "
860+
"can cause variations in the response even when you use the same seed value. "
861+
"By default, a random seed value is used.",
862+
),
863+
IO.Combo.Input(
864+
"aspect_ratio",
865+
options=[
866+
"auto",
867+
"1:1",
868+
"2:3",
869+
"3:2",
870+
"3:4",
871+
"4:3",
872+
"4:5",
873+
"5:4",
874+
"9:16",
875+
"16:9",
876+
"21:9",
877+
# "1:4",
878+
# "4:1",
879+
# "8:1",
880+
# "1:8",
881+
],
882+
default="auto",
883+
tooltip="If set to 'auto', matches your input image's aspect ratio; "
884+
"if no image is provided, a 16:9 square is usually generated.",
885+
),
886+
IO.Combo.Input(
887+
"resolution",
888+
options=[
889+
# "512px",
890+
"1K",
891+
"2K",
892+
"4K",
893+
],
894+
tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.",
895+
),
896+
IO.Combo.Input(
897+
"response_modalities",
898+
options=["IMAGE"],
899+
advanced=True,
900+
),
901+
IO.Combo.Input(
902+
"thinking_level",
903+
options=["MINIMAL", "HIGH"],
904+
),
905+
IO.Image.Input(
906+
"images",
907+
optional=True,
908+
tooltip="Optional reference image(s). "
909+
"To include multiple images, use the Batch Images node (up to 14).",
910+
),
911+
IO.Custom("GEMINI_INPUT_FILES").Input(
912+
"files",
913+
optional=True,
914+
tooltip="Optional file(s) to use as context for the model. "
915+
"Accepts inputs from the Gemini Generate Content Input Files node.",
916+
),
917+
IO.String.Input(
918+
"system_prompt",
919+
multiline=True,
920+
default=GEMINI_IMAGE_SYS_PROMPT,
921+
optional=True,
922+
tooltip="Foundational instructions that dictate an AI's behavior.",
923+
advanced=True,
924+
),
925+
],
926+
outputs=[
927+
IO.Image.Output(),
928+
],
929+
hidden=[
930+
IO.Hidden.auth_token_comfy_org,
931+
IO.Hidden.api_key_comfy_org,
932+
IO.Hidden.unique_id,
933+
],
934+
is_api_node=True,
935+
price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
766936
)
767937

768938
@classmethod
@@ -774,11 +944,14 @@ async def execute(
774944
aspect_ratio: str,
775945
resolution: str,
776946
response_modalities: str,
947+
thinking_level: str,
777948
images: Input.Image | None = None,
778949
files: list[GeminiPart] | None = None,
779950
system_prompt: str = "",
780951
) -> IO.NodeOutput:
781952
validate_string(prompt, strip_whitespace=True, min_length=1)
953+
if model == "Nano Banana 2 (Gemini 3.1 Flash Image)":
954+
model = "gemini-3.1-flash-image-preview"
782955

783956
parts: list[GeminiPart] = [GeminiPart(text=prompt)]
784957
if images is not None:
@@ -806,6 +979,7 @@ async def execute(
806979
generationConfig=GeminiImageGenerationConfig(
807980
responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
808981
imageConfig=image_config,
982+
thinkingConfig=GeminiThinkingConfig(thinkingLevel=thinking_level),
809983
),
810984
systemInstruction=gemini_system_prompt,
811985
),
@@ -822,6 +996,7 @@ async def get_node_list(self) -> list[type[IO.ComfyNode]]:
822996
GeminiNode,
823997
GeminiImage,
824998
GeminiImage2,
999+
GeminiNanoBanana2,
8251000
GeminiInputFiles,
8261001
]
8271002

0 commit comments

Comments
 (0)