Implement dynamic vision capability enablement ericc-ch#36

Eason0729 · Eason0729 · commit 67d2fed79a52 · 2025-06-07T17:05:59.000+08:00
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
@@ -15,7 +15,7 @@ const API_VERSION = "2025-04-01"
 
 export const copilotBaseUrl = (state: State) =>
   `https://api.${state.accountType}.githubcopilot.com`
-export const copilotHeaders = (state: State) => {
+export const copilotHeaders = (state: State, vision: boolean = false) => {
   const headers: Record<string, string> = {
     Authorization: `Bearer ${state.copilotToken}`,
     "content-type": standardHeaders()["content-type"],
@@ -29,9 +29,7 @@ export const copilotHeaders = (state: State) => {
     "x-vscode-user-agent-library-version": "electron-fetch",
   }
 
-  if (state.visionEnabled) {
-    headers["copilot-vision-request"] = "true"
-  }
+  if (vision) headers["copilot-vision-request"] = "true"
 
   return headers
 }
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
@@ -9,9 +9,19 @@ export const createChatCompletions = async (
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
+  for (const message of payload.messages) {
+    intoCopilotMessage(message)
+  }
+
+  const visionEnable = payload.messages.some(
+    (x) =>
+      typeof x.content !== "string"
+      && x.content.some((x) => x.type === "image_url"),
+  )
+
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
-    headers: copilotHeaders(state),
+    headers: copilotHeaders(state, visionEnable),
     body: JSON.stringify(payload),
   })
 
@@ -25,6 +35,14 @@ export const createChatCompletions = async (
   return (await response.json()) as ChatCompletionResponse
 }
 
+const intoCopilotMessage = (message: Message) => {
+  if (typeof message.content === "string") return false
+
+  for (const part of message.content) {
+    if (part.type === "input_image") part.type = "image_url"
+  }
+}
+
 // Streaming types
 
 export interface ChatCompletionChunk {
@@ -79,7 +97,15 @@ export interface ChatCompletionsPayload {
 
 export interface Message {
   role: "user" | "assistant" | "system"
-  content: string
+  content: string | Array<ContentPart>
 }
 
 // https://platform.openai.com/docs/api-reference
+
+export interface ContentPart {
+  type: "input_image" | "input_text" | "image_url"
+  text?: string
+  image_url?: string
+}
+// https://platform.openai.com/docs/guides/images-vision#giving-a-model-images-as-input
+// Note: copilot use "image_url", but openai use "input_image"