diff --git a/src/tools/image-gen.ts b/src/tools/image-gen.ts index 4dc12f0..f1b8e2c 100644 --- a/src/tools/image-gen.ts +++ b/src/tools/image-gen.ts @@ -1,12 +1,12 @@ /** - * Image generation tool using Google AI (Imagen 3). + * Image generation tool using Google AI (Gemini 3 Pro Image Preview). * Usage: bun run src/tools/image-gen.ts "prompt here" * Outputs the path to the generated image. */ const API_KEY = process.env.GOOGLE_AI_API_KEY; const OUTPUT_DIR = '/mnt/work/tmp/image-gen'; -const MODEL = 'imagen-4.0-generate-001'; +const MODEL = 'gemini-3-pro-image-preview'; async function generateImage(prompt: string): Promise { if (!API_KEY) throw new Error('Missing GOOGLE_AI_API_KEY in .env'); @@ -14,16 +14,17 @@ async function generateImage(prompt: string): Promise { console.error(`Generating: "${prompt}"...`); const res = await fetch( - `https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:predict?key=${API_KEY}`, + `https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:generateContent?key=${API_KEY}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - instances: [{ prompt }], - parameters: { - sampleCount: 1, - aspectRatio: '1:1', - safetyFilterLevel: 'block_few', + contents: [{ + parts: [{ text: `Generate an image: ${prompt}` }] + }], + generationConfig: { + responseModalities: ['IMAGE', 'TEXT'], + responseMimeType: 'text/plain', }, }), signal: AbortSignal.timeout(60_000), @@ -36,16 +37,24 @@ async function generateImage(prompt: string): Promise { } const data = await res.json(); - const b64 = data.predictions?.[0]?.bytesBase64Encoded; - if (!b64) { - throw new Error(`No image in response: ${JSON.stringify(data).slice(0, 500)}`); + // Find the image part in the response + const parts = data.candidates?.[0]?.content?.parts || []; + const imagePart = parts.find((p: any) => p.inlineData?.mimeType?.startsWith('image/')); + + if (!imagePart) { + const textParts = parts.filter((p: any) => p.text).map((p: any) => p.text).join('\n'); + throw new Error(`No image in response. Text: ${textParts || JSON.stringify(data).slice(0, 500)}`); } + const b64 = imagePart.inlineData.data; + const mime = imagePart.inlineData.mimeType; + const ext = mime.includes('png') ? 'png' : 'jpg'; + const { mkdirSync, writeFileSync } = await import('fs'); mkdirSync(OUTPUT_DIR, { recursive: true }); - const filename = `img_${Date.now()}.png`; + const filename = `img_${Date.now()}.${ext}`; const outPath = `${OUTPUT_DIR}/${filename}`; writeFileSync(outPath, Buffer.from(b64, 'base64'));