index.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. import { tool } from "@opencode-ai/plugin/tool"
  2. import { mkdir } from "fs/promises"
  3. import { join, dirname, basename, extname, resolve } from "path"
  4. import { getApiKey } from "../env"
  5. // Function to detect if we're in test mode
  6. function isTestMode(): boolean {
  7. // Only enable test mode when explicitly set
  8. return process.env.GEMINI_TEST_MODE === 'true'
  9. }
  10. // Function to get Gemini API key with automatic .env loading
  11. async function getGeminiApiKey(): Promise<string> {
  12. if (isTestMode()) {
  13. return 'test-api-key'
  14. }
  15. return getApiKey('GEMINI_API_KEY')
  16. }
  17. interface ImageConfig {
  18. outputDir?: string;
  19. useTimestamp?: boolean;
  20. preserveOriginal?: boolean;
  21. customName?: string;
  22. }
  23. async function parseImageInput(input: string) {
  24. // Accepts file path ("./img.png") or data URL ("data:image/png;base64,...")
  25. if (input.startsWith("data:")) {
  26. const base64 = input.split(",")[1]
  27. const mime = input.substring(5, input.indexOf(";"))
  28. return { mime, base64 }
  29. }
  30. // Treat as file path
  31. const file = Bun.file(input)
  32. const arr = await file.arrayBuffer()
  33. const base64 = Buffer.from(arr).toString("base64")
  34. // Best-effort mime
  35. const mime = file.type || "image/png"
  36. return { mime, base64 }
  37. }
  38. async function ensureDirectoryExists(dirPath: string) {
  39. try {
  40. await mkdir(dirPath, { recursive: true })
  41. } catch (error) {
  42. // Directory might already exist, that's fine
  43. }
  44. }
  45. function getDateBasedPath(baseDir?: string): string {
  46. // Default to assets/images at repo root
  47. if (!baseDir) {
  48. // Navigate from .opencode/tool/ to repo root, then to assets/images
  49. baseDir = resolve(process.cwd(), "../../assets/images")
  50. }
  51. const today = new Date().toISOString().split('T')[0] // YYYY-MM-DD format
  52. return join(baseDir, today)
  53. }
  54. async function getUniqueFilename(directory: string, baseName: string, extension: string, isEdit: boolean = false): Promise<string> {
  55. await ensureDirectoryExists(directory)
  56. if (!isEdit) {
  57. // For generations, use timestamp if file exists
  58. const baseFilename = join(directory, `${baseName}${extension}`)
  59. const fileExists = await Bun.file(baseFilename).exists()
  60. if (!fileExists) {
  61. return baseFilename
  62. }
  63. // Add timestamp if file exists
  64. const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5) // Remove milliseconds and Z
  65. return join(directory, `${baseName}_${timestamp}${extension}`)
  66. }
  67. // For edits, use incremental numbering
  68. let counter = 1
  69. let filename: string
  70. do {
  71. const editSuffix = `_edit_${counter.toString().padStart(3, '0')}`
  72. filename = join(directory, `${baseName}${editSuffix}${extension}`)
  73. counter++
  74. } while (await Bun.file(filename).exists())
  75. return filename
  76. }
  77. export async function generateImage(prompt: string, config: ImageConfig = {}): Promise<string> {
  78. const apiKey = await getGeminiApiKey()
  79. // Test mode - return mock response without API call
  80. if (isTestMode()) {
  81. const baseDir = config.outputDir || getDateBasedPath()
  82. const generationsDir = join(baseDir, "generations")
  83. let baseName = config.customName || "generated"
  84. if (baseName.endsWith('.png') || baseName.endsWith('.jpg') || baseName.endsWith('.jpeg')) {
  85. baseName = baseName.substring(0, baseName.lastIndexOf('.'))
  86. }
  87. const outputPath = await getUniqueFilename(generationsDir, baseName, ".png", false)
  88. return `[TEST MODE] Would generate image: ${outputPath} for prompt: "${prompt.substring(0, 50)}..."`
  89. }
  90. const body = {
  91. contents: [{
  92. parts: [{ text: prompt }]
  93. }],
  94. }
  95. const res = await fetch(
  96. "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image-preview:generateContent",
  97. {
  98. method: "POST",
  99. headers: {
  100. "Content-Type": "application/json",
  101. "x-goog-api-key": apiKey,
  102. },
  103. body: JSON.stringify(body),
  104. }
  105. )
  106. if (!res.ok) {
  107. const errorText = await res.text()
  108. throw new Error(`API error (${res.status}): ${errorText}`)
  109. }
  110. const json = await res.json()
  111. // Look for image data in the response
  112. const candidates = json?.candidates
  113. if (!candidates || candidates.length === 0) {
  114. throw new Error("No candidates in response")
  115. }
  116. const parts = candidates[0]?.content?.parts
  117. if (!parts || parts.length === 0) {
  118. throw new Error("No parts in response")
  119. }
  120. let b64 = null
  121. for (const part of parts) {
  122. if (part.inlineData?.data) {
  123. b64 = part.inlineData.data
  124. break
  125. }
  126. }
  127. if (!b64) {
  128. throw new Error("No image data returned from Nano Banana model")
  129. }
  130. // Determine output path
  131. const baseDir = config.outputDir || getDateBasedPath()
  132. const generationsDir = join(baseDir, "generations")
  133. // Generate filename (remove extension if already present)
  134. let baseName = config.customName || "generated"
  135. if (baseName.endsWith('.png') || baseName.endsWith('.jpg') || baseName.endsWith('.jpeg')) {
  136. baseName = baseName.substring(0, baseName.lastIndexOf('.'))
  137. }
  138. const extension = ".png"
  139. const outputPath = await getUniqueFilename(generationsDir, baseName, extension, false)
  140. console.log(`Saving generated image to: ${outputPath}`)
  141. await Bun.write(outputPath, Buffer.from(b64, "base64"))
  142. const fileExists = await Bun.file(outputPath).exists()
  143. if (!fileExists) {
  144. throw new Error(`Failed to save file to ${outputPath}`)
  145. }
  146. const stats = await Bun.file(outputPath).stat()
  147. return `Generated image saved: ${outputPath} (${stats.size} bytes)`
  148. }
  149. export async function editImage(imagePath: string, prompt: string, config: ImageConfig = {}): Promise<string> {
  150. const apiKey = await getGeminiApiKey()
  151. // Test mode - return mock response without API call
  152. if (isTestMode()) {
  153. const baseDir = config.outputDir || getDateBasedPath()
  154. const editsDir = join(baseDir, "edits")
  155. const originalName = basename(imagePath, extname(imagePath))
  156. let baseName = config.customName || originalName
  157. if (baseName.endsWith('.png') || baseName.endsWith('.jpg') || baseName.endsWith('.jpeg')) {
  158. baseName = baseName.substring(0, baseName.lastIndexOf('.'))
  159. }
  160. const outputPath = await getUniqueFilename(editsDir, baseName, ".png", true)
  161. return `[TEST MODE] Would edit image: ${imagePath} -> ${outputPath} with prompt: "${prompt.substring(0, 50)}..."`
  162. }
  163. // Parse the input image
  164. const { mime, base64 } = await parseImageInput(imagePath)
  165. const body = {
  166. contents: [{
  167. parts: [
  168. { text: prompt },
  169. { inlineData: { mimeType: mime, data: base64 } }
  170. ]
  171. }],
  172. }
  173. const res = await fetch(
  174. "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image-preview:generateContent",
  175. {
  176. method: "POST",
  177. headers: {
  178. "Content-Type": "application/json",
  179. "x-goog-api-key": apiKey,
  180. },
  181. body: JSON.stringify(body),
  182. }
  183. )
  184. if (!res.ok) {
  185. const errorText = await res.text()
  186. throw new Error(`API error (${res.status}): ${errorText}`)
  187. }
  188. const json = await res.json()
  189. // Look for image data in the response
  190. const candidates = json?.candidates
  191. if (!candidates || candidates.length === 0) {
  192. throw new Error("No candidates in response")
  193. }
  194. const parts = candidates[0]?.content?.parts
  195. if (!parts || parts.length === 0) {
  196. throw new Error("No parts in response")
  197. }
  198. let b64 = null
  199. for (const part of parts) {
  200. if (part.inlineData?.data) {
  201. b64 = part.inlineData.data
  202. break
  203. }
  204. }
  205. if (!b64) {
  206. throw new Error("No image data returned from Nano Banana model")
  207. }
  208. // Determine output path
  209. const baseDir = config.outputDir || getDateBasedPath()
  210. const editsDir = join(baseDir, "edits")
  211. // Extract original filename without extension
  212. const originalName = basename(imagePath, extname(imagePath))
  213. let baseName = config.customName || originalName
  214. if (baseName.endsWith('.png') || baseName.endsWith('.jpg') || baseName.endsWith('.jpeg')) {
  215. baseName = baseName.substring(0, baseName.lastIndexOf('.'))
  216. }
  217. const extension = ".png"
  218. const outputPath = await getUniqueFilename(editsDir, baseName, extension, true)
  219. console.log(`Saving edited image to: ${outputPath}`)
  220. await Bun.write(outputPath, Buffer.from(b64, "base64"))
  221. const fileExists = await Bun.file(outputPath).exists()
  222. if (!fileExists) {
  223. throw new Error(`Failed to save file to ${outputPath}`)
  224. }
  225. const stats = await Bun.file(outputPath).stat()
  226. return `Edited image saved: ${outputPath} (${stats.size} bytes)`
  227. }
  228. export async function analyzeImage(imagePath: string, question: string): Promise<string> {
  229. const apiKey = await getGeminiApiKey()
  230. // Test mode - return mock response without API call
  231. if (isTestMode()) {
  232. return `[TEST MODE] Would analyze image: ${imagePath} with question: "${question.substring(0, 50)}..." - Mock analysis: This is a test image analysis response.`
  233. }
  234. const { mime, base64 } = await parseImageInput(imagePath)
  235. const res = await fetch(
  236. "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent",
  237. {
  238. method: "POST",
  239. headers: {
  240. "Content-Type": "application/json",
  241. "x-goog-api-key": apiKey,
  242. },
  243. body: JSON.stringify({
  244. contents: [{
  245. parts: [
  246. { text: question },
  247. { inlineData: { mimeType: mime, data: base64 } }
  248. ]
  249. }],
  250. }),
  251. }
  252. )
  253. if (!res.ok) {
  254. const errorText = await res.text()
  255. throw new Error(`API error (${res.status}): ${errorText}`)
  256. }
  257. const json = await res.json()
  258. const text = json?.candidates?.[0]?.content?.parts?.[0]?.text
  259. if (!text) {
  260. throw new Error("No analysis returned")
  261. }
  262. return text
  263. }
  264. // Tool for generating images from text
  265. export const generate = tool({
  266. description: "Generate an image using Gemini Nano Banana from text prompt",
  267. args: {
  268. prompt: tool.schema.string().describe("Text description of the image to generate"),
  269. outputDir: tool.schema.string().optional().describe("Custom output directory (default: ./generated-images/YYYY-MM-DD/)"),
  270. filename: tool.schema.string().optional().describe("Custom filename (default: generated)"),
  271. },
  272. async execute(args, context) {
  273. try {
  274. const config: ImageConfig = {
  275. outputDir: args.outputDir,
  276. customName: args.filename,
  277. }
  278. return await generateImage(args.prompt, config)
  279. } catch (error) {
  280. return `Error: ${error.message}`
  281. }
  282. },
  283. })
  284. // Tool for editing existing images
  285. export const edit = tool({
  286. description: "Edit an existing image using Gemini Nano Banana",
  287. args: {
  288. image: tool.schema.string().describe("File path or data URL of image to edit"),
  289. prompt: tool.schema.string().describe("Edit instruction"),
  290. outputDir: tool.schema.string().optional().describe("Custom output directory (default: ./generated-images/YYYY-MM-DD/)"),
  291. filename: tool.schema.string().optional().describe("Custom filename (default: original name with _edit_XXX)"),
  292. },
  293. async execute(args, context) {
  294. try {
  295. const config: ImageConfig = {
  296. outputDir: args.outputDir,
  297. customName: args.filename,
  298. }
  299. return await editImage(args.image, args.prompt, config)
  300. } catch (error) {
  301. return `Error: ${error.message}`
  302. }
  303. },
  304. })
  305. // Tool for analyzing images
  306. export const analyze = tool({
  307. description: "Analyze an image using Gemini (text analysis only)",
  308. args: {
  309. image: tool.schema.string().describe("File path or data URL of image to analyze"),
  310. question: tool.schema.string().describe("What to analyze about the image"),
  311. },
  312. async execute(args, context) {
  313. try {
  314. return await analyzeImage(args.image, args.question)
  315. } catch (error) {
  316. return `Error: ${error.message}`
  317. }
  318. },
  319. })
  320. // Default export for backward compatibility
  321. export default edit