diff --git a/js/plugins/anthropic/README.md b/js/plugins/anthropic/README.md index ec9c9115c0..acc80eec68 100644 --- a/js/plugins/anthropic/README.md +++ b/js/plugins/anthropic/README.md @@ -56,11 +56,6 @@ const response = await ai.generate({ { text: 'What animal is in the photo?' }, { media: { url: imageUrl } }, ], - config: { - // control of the level of visual detail when processing image embeddings - // Low detail level also decreases the token usage - visualDetailLevel: 'low', - }, }); console.log(response.text); ``` diff --git a/js/testapps/anthropic/README.md b/js/testapps/anthropic/README.md index 3343b90ea7..b209958e68 100644 --- a/js/testapps/anthropic/README.md +++ b/js/testapps/anthropic/README.md @@ -11,7 +11,9 @@ src/ text-plain.ts - Text/plain error handling demonstration webp.ts - WEBP image handling demonstration pdf.ts - PDF document processing examples + vision.ts - Image/vision analysis examples attention-first-page.pdf - Sample PDF file for testing + sample-image.png - Sample image file for vision demo beta/ basic.ts - Basic beta API examples ``` @@ -39,6 +41,7 @@ src/ - `pnpm run dev:stable:text-plain` – Start Dev UI for text/plain error handling demo. - `pnpm run dev:stable:webp` – Start Dev UI for WEBP image handling demo. - `pnpm run dev:stable:pdf` – Start Dev UI for PDF document processing demo. +- `pnpm run dev:stable:vision` – Start Dev UI for image/vision analysis demo. ## Flows @@ -64,4 +67,9 @@ Each source file defines flows that can be invoked from the Dev UI or the Genkit - `stable-pdf-url` – Process a PDF from a publicly accessible URL - `stable-pdf-analysis` – Analyze a PDF document for key topics, concepts, and visual elements +### Vision/Image Analysis +- `stable-vision-url` – Analyze an image from a public URL +- `stable-vision-base64` – Analyze an image from a local file (base64 encoded) +- `stable-vision-conversation` – Multi-turn conversation about an image + Example: `genkit flow:run anthropic-stable-hello` diff --git a/js/testapps/anthropic/package.json b/js/testapps/anthropic/package.json index 08e1a0d2fd..7ecb0e2a62 100644 --- a/js/testapps/anthropic/package.json +++ b/js/testapps/anthropic/package.json @@ -13,6 +13,7 @@ "dev:stable:text-plain": "genkit start -- npx tsx --watch src/stable/text-plain.ts", "dev:stable:webp": "genkit start -- npx tsx --watch src/stable/webp.ts", "dev:stable:pdf": "genkit start -- npx tsx --watch src/stable/pdf.ts", + "dev:stable:vision": "genkit start -- npx tsx --watch src/stable/vision.ts", "genkit:dev": "cross-env GENKIT_ENV=dev npm run dev:stable", "genkit:start": "cross-env GENKIT_ENV=dev genkit start -- tsx --watch src/stable/basic.ts", "dev": "export GENKIT_RUNTIME_ID=$(openssl rand -hex 8) && node lib/stable/basic.js 2>&1" diff --git a/js/testapps/anthropic/src/stable/sample-image.png b/js/testapps/anthropic/src/stable/sample-image.png new file mode 100644 index 0000000000..2a273c0b76 Binary files /dev/null and b/js/testapps/anthropic/src/stable/sample-image.png differ diff --git a/js/testapps/anthropic/src/stable/vision.ts b/js/testapps/anthropic/src/stable/vision.ts new file mode 100644 index 0000000000..0a53e7b657 --- /dev/null +++ b/js/testapps/anthropic/src/stable/vision.ts @@ -0,0 +1,132 @@ +/** + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { anthropic } from '@genkit-ai/anthropic'; +import * as fs from 'fs'; +import { genkit } from 'genkit'; +import * as path from 'path'; + +const ai = genkit({ + plugins: [anthropic()], +}); + +/** + * This flow demonstrates image analysis using a publicly accessible URL. + * Claude will describe what it sees in the image. + */ +ai.defineFlow('stable-vision-url', async () => { + // Using a Wikipedia Commons image (public domain) + const imageUrl = + 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg'; + + const { text } = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + messages: [ + { + role: 'user', + content: [ + { text: 'What do you see in this image? Describe it in detail.' }, + { + media: { + url: imageUrl, + }, + }, + ], + }, + ], + }); + + return text; +}); + +/** + * This flow demonstrates image analysis using a local file. + * The image is read from disk and sent as a base64 data URL. + */ +ai.defineFlow('stable-vision-base64', async () => { + // Read image file from the same directory as this source file + const imagePath = path.join(__dirname, 'sample-image.png'); + const imageBuffer = fs.readFileSync(imagePath); + const imageBase64 = imageBuffer.toString('base64'); + + const { text } = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + messages: [ + { + role: 'user', + content: [ + { + text: 'Describe this image. What objects, colors, and scenes do you observe?', + }, + { + media: { + url: `data:image/png;base64,${imageBase64}`, + contentType: 'image/png', + }, + }, + ], + }, + ], + }); + + return text; +}); + +/** + * This flow demonstrates multi-turn conversation about an image. + * Claude can answer follow-up questions about images it has seen. + */ +ai.defineFlow('stable-vision-conversation', async () => { + const imagePath = path.join(__dirname, 'sample-image.png'); + const imageBuffer = fs.readFileSync(imagePath); + const imageBase64 = imageBuffer.toString('base64'); + + const { text } = await ai.generate({ + model: anthropic.model('claude-sonnet-4-5'), + messages: [ + { + role: 'user', + content: [ + { text: 'What do you see in this image?' }, + { + media: { + url: `data:image/png;base64,${imageBase64}`, + contentType: 'image/png', + }, + }, + ], + }, + { + role: 'model', + content: [ + { + text: 'I see a beautiful mountain landscape with a fjord or lake, green hills, and dramatic peaks under a blue sky with wispy clouds.', + }, + ], + }, + { + role: 'user', + content: [ + { + text: 'What time of day do you think this photo was taken, and what season might it be?', + }, + ], + }, + ], + }); + + return text; +});