refactor(image-gen): switch to gemini-3-pro-image-preview

uses generateContent API with IMAGE response modality.
This commit is contained in:
Nicholai Vogel 2026-01-24 02:06:13 -07:00
parent f9a0ec68b2
commit f739508c8c

View File

@ -1,12 +1,12 @@
/** /**
* Image generation tool using Google AI (Imagen 3). * Image generation tool using Google AI (Gemini 3 Pro Image Preview).
* Usage: bun run src/tools/image-gen.ts "prompt here" * Usage: bun run src/tools/image-gen.ts "prompt here"
* Outputs the path to the generated image. * Outputs the path to the generated image.
*/ */
const API_KEY = process.env.GOOGLE_AI_API_KEY; const API_KEY = process.env.GOOGLE_AI_API_KEY;
const OUTPUT_DIR = '/mnt/work/tmp/image-gen'; const OUTPUT_DIR = '/mnt/work/tmp/image-gen';
const MODEL = 'imagen-4.0-generate-001'; const MODEL = 'gemini-3-pro-image-preview';
async function generateImage(prompt: string): Promise<string> { async function generateImage(prompt: string): Promise<string> {
if (!API_KEY) throw new Error('Missing GOOGLE_AI_API_KEY in .env'); if (!API_KEY) throw new Error('Missing GOOGLE_AI_API_KEY in .env');
@ -14,16 +14,17 @@ async function generateImage(prompt: string): Promise<string> {
console.error(`Generating: "${prompt}"...`); console.error(`Generating: "${prompt}"...`);
const res = await fetch( const res = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:predict?key=${API_KEY}`, `https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:generateContent?key=${API_KEY}`,
{ {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ body: JSON.stringify({
instances: [{ prompt }], contents: [{
parameters: { parts: [{ text: `Generate an image: ${prompt}` }]
sampleCount: 1, }],
aspectRatio: '1:1', generationConfig: {
safetyFilterLevel: 'block_few', responseModalities: ['IMAGE', 'TEXT'],
responseMimeType: 'text/plain',
}, },
}), }),
signal: AbortSignal.timeout(60_000), signal: AbortSignal.timeout(60_000),
@ -36,16 +37,24 @@ async function generateImage(prompt: string): Promise<string> {
} }
const data = await res.json(); const data = await res.json();
const b64 = data.predictions?.[0]?.bytesBase64Encoded;
if (!b64) { // Find the image part in the response
throw new Error(`No image in response: ${JSON.stringify(data).slice(0, 500)}`); const parts = data.candidates?.[0]?.content?.parts || [];
const imagePart = parts.find((p: any) => p.inlineData?.mimeType?.startsWith('image/'));
if (!imagePart) {
const textParts = parts.filter((p: any) => p.text).map((p: any) => p.text).join('\n');
throw new Error(`No image in response. Text: ${textParts || JSON.stringify(data).slice(0, 500)}`);
} }
const b64 = imagePart.inlineData.data;
const mime = imagePart.inlineData.mimeType;
const ext = mime.includes('png') ? 'png' : 'jpg';
const { mkdirSync, writeFileSync } = await import('fs'); const { mkdirSync, writeFileSync } = await import('fs');
mkdirSync(OUTPUT_DIR, { recursive: true }); mkdirSync(OUTPUT_DIR, { recursive: true });
const filename = `img_${Date.now()}.png`; const filename = `img_${Date.now()}.${ext}`;
const outPath = `${OUTPUT_DIR}/${filename}`; const outPath = `${OUTPUT_DIR}/${filename}`;
writeFileSync(outPath, Buffer.from(b64, 'base64')); writeFileSync(outPath, Buffer.from(b64, 'base64'));