refactor(image-gen): switch to gemini-3-pro-image-preview

uses generateContent API with IMAGE response modality.
This commit is contained in:
Nicholai Vogel 2026-01-24 02:06:13 -07:00
parent f9a0ec68b2
commit f739508c8c

View File

@ -1,12 +1,12 @@
/**
* Image generation tool using Google AI (Imagen 3).
* Image generation tool using Google AI (Gemini 3 Pro Image Preview).
* Usage: bun run src/tools/image-gen.ts "prompt here"
* Outputs the path to the generated image.
*/
const API_KEY = process.env.GOOGLE_AI_API_KEY;
const OUTPUT_DIR = '/mnt/work/tmp/image-gen';
const MODEL = 'imagen-4.0-generate-001';
const MODEL = 'gemini-3-pro-image-preview';
async function generateImage(prompt: string): Promise<string> {
if (!API_KEY) throw new Error('Missing GOOGLE_AI_API_KEY in .env');
@ -14,16 +14,17 @@ async function generateImage(prompt: string): Promise<string> {
console.error(`Generating: "${prompt}"...`);
const res = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:predict?key=${API_KEY}`,
`https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:generateContent?key=${API_KEY}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instances: [{ prompt }],
parameters: {
sampleCount: 1,
aspectRatio: '1:1',
safetyFilterLevel: 'block_few',
contents: [{
parts: [{ text: `Generate an image: ${prompt}` }]
}],
generationConfig: {
responseModalities: ['IMAGE', 'TEXT'],
responseMimeType: 'text/plain',
},
}),
signal: AbortSignal.timeout(60_000),
@ -36,16 +37,24 @@ async function generateImage(prompt: string): Promise<string> {
}
const data = await res.json();
const b64 = data.predictions?.[0]?.bytesBase64Encoded;
if (!b64) {
throw new Error(`No image in response: ${JSON.stringify(data).slice(0, 500)}`);
// Find the image part in the response
const parts = data.candidates?.[0]?.content?.parts || [];
const imagePart = parts.find((p: any) => p.inlineData?.mimeType?.startsWith('image/'));
if (!imagePart) {
const textParts = parts.filter((p: any) => p.text).map((p: any) => p.text).join('\n');
throw new Error(`No image in response. Text: ${textParts || JSON.stringify(data).slice(0, 500)}`);
}
const b64 = imagePart.inlineData.data;
const mime = imagePart.inlineData.mimeType;
const ext = mime.includes('png') ? 'png' : 'jpg';
const { mkdirSync, writeFileSync } = await import('fs');
mkdirSync(OUTPUT_DIR, { recursive: true });
const filename = `img_${Date.now()}.png`;
const filename = `img_${Date.now()}.${ext}`;
const outPath = `${OUTPUT_DIR}/${filename}`;
writeFileSync(outPath, Buffer.from(b64, 'base64'));