gemini-multiple.ts 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import { tool } from "@opencode-ai/plugin"
  2. async function parseImageInput(input: string) {
  3. // Accepts file path ("./img.png") or data URL ("data:image/png;base64,...")
  4. if (input.startsWith("data:")) {
  5. const base64 = input.split(",")[1]
  6. const mime = input.substring(5, input.indexOf(";"))
  7. return { mime, base64 }
  8. }
  9. // Treat as file path
  10. const file = Bun.file(input)
  11. const arr = await file.arrayBuffer()
  12. const base64 = Buffer.from(arr).toString("base64")
  13. // Best-effort mime
  14. const mime = file.type || "image/png"
  15. return { mime, base64 }
  16. }
  17. async function callGeminiAPI(mime: string, base64: string, prompt: string) {
  18. const apiKey = process.env.GEMINI_API_KEY
  19. if (!apiKey) throw new Error("Set GEMINI_API_KEY in your environment")
  20. const res = await fetch(
  21. "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image-preview:generateContent",
  22. {
  23. method: "POST",
  24. headers: {
  25. "Content-Type": "application/json",
  26. "x-goog-api-key": apiKey,
  27. },
  28. body: JSON.stringify({
  29. inputs: [{ mimeType: mime, data: base64 }],
  30. contents: [{ parts: [{ text: prompt }]}],
  31. }),
  32. }
  33. )
  34. if (!res.ok) throw new Error(`API error: ${await res.text()}`)
  35. const json = await res.json()
  36. const b64 = json?.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data
  37. if (!b64) throw new Error("No image data returned")
  38. return b64
  39. }
  40. export const edit = tool({
  41. description: "Edit an image using Gemini with file path or data URL",
  42. args: {
  43. image: tool.schema.string().describe("File path or data URL"),
  44. prompt: tool.schema.string().describe("Edit instruction"),
  45. output: tool.schema.string().optional().describe("Output filename (default edited.png)"),
  46. },
  47. async execute(args, context) {
  48. try {
  49. const { mime, base64 } = await parseImageInput(args.image)
  50. const resultBase64 = await callGeminiAPI(mime, base64, args.prompt)
  51. const out = args.output || "edited.png"
  52. await Bun.write(out, Buffer.from(resultBase64, "base64"))
  53. return `Saved edited image to ${out}`
  54. } catch (error) {
  55. return `Error: ${error.message}`
  56. }
  57. },
  58. })
  59. export const analyze = tool({
  60. description: "Analyze an image using Gemini without editing",
  61. args: {
  62. image: tool.schema.string().describe("File path or data URL"),
  63. question: tool.schema.string().describe("What to analyze about the image"),
  64. },
  65. async execute(args, context) {
  66. try {
  67. const { mime, base64 } = await parseImageInput(args.image)
  68. const apiKey = process.env.GEMINI_API_KEY
  69. if (!apiKey) return "Set GEMINI_API_KEY in your environment"
  70. const res = await fetch(
  71. "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent",
  72. {
  73. method: "POST",
  74. headers: {
  75. "Content-Type": "application/json",
  76. "x-goog-api-key": apiKey,
  77. },
  78. body: JSON.stringify({
  79. contents: [{
  80. parts: [
  81. { text: args.question },
  82. { inlineData: { mimeType: mime, data: base64 } }
  83. ]
  84. }],
  85. }),
  86. }
  87. )
  88. if (!res.ok) return `API error: ${await res.text()}`
  89. const json = await res.json()
  90. const text = json?.candidates?.[0]?.content?.parts?.[0]?.text
  91. if (!text) return "No analysis returned"
  92. return text
  93. } catch (error) {
  94. return `Error: ${error.message}`
  95. }
  96. },
  97. })