From c83a9f8365654bb91da4036f8d1c5608f2a3d2be Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 01:49:16 +0800
Subject: [PATCH 01/17] feat: VS Code header simulation accuracy (#37) (#47)

* docs: add PRD for vscode-header-simulation (#37)

* feat(version-detection): add VS Code + Copilot Chat version auto-detection (#37)

- Replace AUR scrape in getVSCodeVersion() with official update.code.visualstudio.com API; keep AUR as secondary fallback; retain hardcoded "1.104.3" tertiary fallback
- Add getCopilotChatVersion() querying VS Code Marketplace API for GitHub.copilot-chat; fallback "0.26.7"
- Both functions use module-level 24h in-memory TTL cache
- Add copilotChatVersion?: string to State interface and initial state
- Add tests covering: successful fetch, network failure fallback, and cache hit within TTL

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* feat(api-config): wire dynamic versions into headers + startup log (#37)

- Add cacheCopilotChatVersion() in utils.ts; call it alongside cacheVSCodeVersion() at startup
- Log resolved versions at startup: "VS Code: <ver>  Copilot Chat: <ver>"
- Replace hardcoded COPILOT_VERSION constant in api-config.ts with state.copilotChatVersion ?? "0.26.7" in both copilotHeaders() and githubHeaders()
- Remove dead EDITOR_PLUGIN_VERSION and USER_AGENT module-level constants
- Add JSDoc comment block on copilotHeaders() documenting every header's source/purpose
- Add tests/utils.test.ts covering cacheCopilotChatVersion and cacheVSCodeVersion with mocked services

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: address review round 1 feedback (#37)

- C1: validate version strings against /^\d+\.\d+\.\d+$/ before use in HTTP headers (CRLF injection prevention)
- C2: skip cache write when version equals FALLBACK constant, allowing retry on next call
- C3/I2: run getVSCodeVersion + getCopilotChatVersion in parallel via Promise.all in start.ts
- I1: delete cacheCopilotChatVersion and cacheVSCodeVersion wrapper functions from utils.ts; call service fns directly from start.ts
- I3: add consola.warn() before every FALLBACK usage (fetch failures and format check)
- I4: remove four Marketplace response interfaces; use eslint-disabled any + unknown typed chain access
- I5: remove pointless const cached = cache alias; reference cache directly
- I6: tighten AUR regex from /pkgver=([0-9.]+)/ to /pkgver=(\d+\.\d+\.\d+)/
- T1-T7: add missing test cases (malformed JSON, missing pkgver, HTTP 503, empty version, TTL expiry, undefined state fallback, CRLF injection)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: address review round 2 feedback (#37)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .gitignore                               |   2 +-
 bun.lock                                 |   1 +
 docs/prd/vscode-header-simulation.md     |  67 +++++
 src/lib/api-config.ts                    |  50 +++-
 src/lib/state.ts                         |   1 +
 src/lib/utils.ts                         |  10 -
 src/services/get-copilot-chat-version.ts |  82 ++++++
 src/services/get-vscode-version.ts       |  81 +++++-
 src/services/version-cache.ts            |   6 +
 src/start.ts                             |  12 +-
 tests/utils.test.ts                      |  45 +++
 tests/version-detection.test.ts          | 349 +++++++++++++++++++++++
 12 files changed, 667 insertions(+), 39 deletions(-)
 create mode 100644 docs/prd/vscode-header-simulation.md
 create mode 100644 src/services/get-copilot-chat-version.ts
 create mode 100644 src/services/version-cache.ts
 create mode 100644 tests/utils.test.ts
 create mode 100644 tests/version-detection.test.ts
diff --git a/.gitignore b/.gitignore
index 577a4f199..9650fd579 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,4 @@ node_modules/
 .eslintcache
 
 # build output
-dist/
\ No newline at end of file
+dist/.crew/
diff --git a/bun.lock b/bun.lock
index 20e895e7f..9ece87578 100644
--- a/bun.lock
+++ b/bun.lock
@@ -1,5 +1,6 @@
 {
   "lockfileVersion": 1,
+  "configVersion": 0,
   "workspaces": {
     "": {
       "name": "copilot-api",
diff --git a/docs/prd/vscode-header-simulation.md b/docs/prd/vscode-header-simulation.md
new file mode 100644
index 000000000..30ae0a91a
--- /dev/null
+++ b/docs/prd/vscode-header-simulation.md
@@ -0,0 +1,67 @@
+# VS Code Header Simulation Accuracy
+
+## Status
+Approved
+
+## Overview
+Auto-detect and keep current the VS Code + Copilot Chat version strings used in every upstream request header, so traffic looks indistinguishable from a real VS Code editor session.
+
+## Motivation
+copilot-api impersonates VS Code Copilot Chat toward GitHub's upstream. Hardcoded version strings become stale as VS Code releases new versions every month. Stale strings increase the distinguishability of copilot-api traffic from legitimate editor traffic. The fix: query live version sources at startup, cache them, and fall back to hardcoded values on failure — so headers always reflect the latest shipping release.
+
+## Requirements
+
+1. **VS Code version auto-detect** — On startup, query `https://update.code.visualstudio.com/api/releases/stable` (JSON array, first element is latest stable version). Use the result for `editor-version: vscode/<version>`.
+2. **Copilot Chat extension version auto-detect** — On startup, query the VS Code Marketplace API for `GitHub.copilot-chat` and extract the latest version. Use it for `editor-plugin-version: copilot-chat/<version>` and `user-agent: GitHubCopilotChat/<version>`.
+3. **24-hour TTL in-memory cache** — Cache both versions for 24 h so repeated token refreshes don't re-query external APIs unnecessarily.
+4. **Graceful fallback** — If either fetch fails (network error, timeout, unexpected shape), log a warning and continue with the existing hardcoded fallback values. Never crash startup.
+5. **Startup log** — At `consola.info` level, print the resolved version strings (`VSCode: X.Y.Z`, `Copilot Chat: A.B.C`) so the user can verify what's being used.
+6. **`x-request-id`** — Confirm it is already generated per-request via `crypto.randomUUID()` (it is — no change needed).
+7. **Header documentation** — Add a comment block in `src/lib/api-config.ts` explaining each header's source and how to update it.
+
+## Acceptance Criteria
+
+- On a clean startup with network access, printed versions match the latest stable VS Code release visible at `https://code.visualstudio.com/updates/`.
+- On startup with network blocked, a warning is logged and the server still starts with fallback values.
+- No new CLI flags required — version detection is automatic.
+- All existing tests pass.
+
+## Technical Approach
+
+### VS Code version
+`GET https://update.code.visualstudio.com/api/releases/stable` returns a JSON array of version strings. Take `[0]`.
+
+### Copilot Chat version
+VS Code Marketplace API:
+```
+GET https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery
+Content-Type: application/json
+Accept: application/json;api-version=3.0-preview.1
+
+Body: {
+  "filters": [{ "criteria": [{ "filterType": 7, "value": "GitHub.copilot-chat" }] }],
+  "flags": 529
+}
+```
+Response path: `results[0].extensions[0].versions[0].version`
+
+### Caching
+Simple module-level `{ version: string, fetchedAt: number }` objects. If `Date.now() - fetchedAt < 24 * 60 * 60 * 1000`, return cached value.
+
+### File changes
+- `src/services/get-vscode-version.ts` — extend with VS Code stable API; keep AUR fallback as secondary fallback.
+- `src/services/get-copilot-chat-version.ts` — new file for Copilot Chat extension version.
+- `src/lib/utils.ts` — `cacheVSCodeVersion()` also calls `cacheCopilotChatVersion()`.
+- `src/lib/state.ts` — add `copilotChatVersion?: string`.
+- `src/lib/api-config.ts` — use `state.copilotChatVersion` for `editor-plugin-version` and `user-agent`; add header documentation comment.
+
+## Testing Strategy
+- Unit test `get-vscode-version.ts`: mock fetch → returns parsed version; mock fail → returns fallback.
+- Unit test `get-copilot-chat-version.ts`: mock fetch → returns parsed version; mock fail → returns fallback.
+- Existing translation tests must continue to pass.
+
+## Out of Scope
+- `OpenAI-Organization` header (not confirmed in VS Code traffic).
+- `X-Vscode-User-Agent-Library-Comment` (not confirmed).
+- Persistent disk cache (in-memory TTL is sufficient for a single server process).
+- Auto-restart on version change.
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 83bce92ad..294959285 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -1,5 +1,7 @@
 import { randomUUID } from "node:crypto"
 
+import { FALLBACK as COPILOT_CHAT_VERSION_FALLBACK } from "~/services/get-copilot-chat-version"
+
 import type { State } from "./state"
 
 export const standardHeaders = () => ({
@@ -7,24 +9,38 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.26.7"
-const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
-const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
-
 const API_VERSION = "2025-04-01"
 
 export const copilotBaseUrl = (state: State) =>
   state.accountType === "individual" ?
     "https://api.githubcopilot.com"
   : `https://api.${state.accountType}.githubcopilot.com`
+
+/**
+ * Headers sent with every upstream request to mimic VS Code Copilot Chat traffic.
+ *
+ * Header sources:
+ *  - Authorization        — Copilot token from GitHub OAuth flow
+ *  - editor-version       — Auto-detected VS Code stable release (update.code.visualstudio.com)
+ *  - editor-plugin-version — Auto-detected GitHub.copilot-chat Marketplace version
+ *  - user-agent           — Same as editor-plugin-version, GitHubCopilotChat/<version>
+ *  - copilot-integration-id — Fixed "vscode-chat"
+ *  - openai-intent        — Fixed "conversation-panel"
+ *  - x-github-api-version — Fixed "2025-04-01" (verify periodically against VS Code source)
+ *  - x-request-id         — Per-request UUID via crypto.randomUUID()
+ *  - x-vscode-user-agent-library-version — Fixed "electron-fetch"
+ *  - copilot-vision-request — Added when request includes image content
+ */
 export const copilotHeaders = (state: State, vision: boolean = false) => {
+  const copilotVersion =
+    state.copilotChatVersion ?? COPILOT_CHAT_VERSION_FALLBACK
   const headers: Record<string, string> = {
     Authorization: `Bearer ${state.copilotToken}`,
     "content-type": standardHeaders()["content-type"],
     "copilot-integration-id": "vscode-chat",
     "editor-version": `vscode/${state.vsCodeVersion}`,
-    "editor-plugin-version": EDITOR_PLUGIN_VERSION,
-    "user-agent": USER_AGENT,
+    "editor-plugin-version": `copilot-chat/${copilotVersion}`,
+    "user-agent": `GitHubCopilotChat/${copilotVersion}`,
     "openai-intent": "conversation-panel",
     "x-github-api-version": API_VERSION,
     "x-request-id": randomUUID(),
@@ -37,15 +53,19 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
 }
 
 export const GITHUB_API_BASE_URL = "https://api.github.com"
-export const githubHeaders = (state: State) => ({
-  ...standardHeaders(),
-  authorization: `token ${state.githubToken}`,
-  "editor-version": `vscode/${state.vsCodeVersion}`,
-  "editor-plugin-version": EDITOR_PLUGIN_VERSION,
-  "user-agent": USER_AGENT,
-  "x-github-api-version": API_VERSION,
-  "x-vscode-user-agent-library-version": "electron-fetch",
-})
+export const githubHeaders = (state: State) => {
+  const copilotVersion =
+    state.copilotChatVersion ?? COPILOT_CHAT_VERSION_FALLBACK
+  return {
+    ...standardHeaders(),
+    authorization: `token ${state.githubToken}`,
+    "editor-version": `vscode/${state.vsCodeVersion}`,
+    "editor-plugin-version": `copilot-chat/${copilotVersion}`,
+    "user-agent": `GitHubCopilotChat/${copilotVersion}`,
+    "x-github-api-version": API_VERSION,
+    "x-vscode-user-agent-library-version": "electron-fetch",
+  }
+}
 
 export const GITHUB_BASE_URL = "https://github.com"
 export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98"
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5ba4dc1d1..01f491c35 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -7,6 +7,7 @@ export interface State {
   accountType: string
   models?: ModelsResponse
   vsCodeVersion?: string
+  copilotChatVersion?: string
 
   manualApprove: boolean
   rateLimitWait: boolean
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index cc80be667..892c61d74 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -1,7 +1,4 @@
-import consola from "consola"
-
 import { getModels } from "~/services/copilot/get-models"
-import { getVSCodeVersion } from "~/services/get-vscode-version"
 
 import { state } from "./state"
 
@@ -17,10 +14,3 @@ export async function cacheModels(): Promise<void> {
   const models = await getModels()
   state.models = models
 }
-
-export const cacheVSCodeVersion = async () => {
-  const response = await getVSCodeVersion()
-  state.vsCodeVersion = response
-
-  consola.info(`Using VSCode version: ${response}`)
-}
diff --git a/src/services/get-copilot-chat-version.ts b/src/services/get-copilot-chat-version.ts
new file mode 100644
index 000000000..b4278b669
--- /dev/null
+++ b/src/services/get-copilot-chat-version.ts
@@ -0,0 +1,82 @@
+import consola from "consola"
+
+import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache"
+
+export const FALLBACK = "0.26.7"
+
+let cache: VersionCache | undefined
+
+async function fetchFromMarketplace(): Promise<string> {
+  const controller = new AbortController()
+  const timeout = setTimeout(() => {
+    controller.abort()
+  }, 5000)
+
+  try {
+    const response = await fetch(
+      "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery",
+      {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Accept: "application/json;api-version=3.0-preview.1",
+        },
+        body: JSON.stringify({
+          filters: [
+            {
+              criteria: [{ filterType: 7, value: "GitHub.copilot-chat" }],
+            },
+          ],
+          flags: 529,
+        }),
+        signal: controller.signal,
+      },
+    )
+
+    /* eslint-disable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access */
+    const data = (await response.json()) as any
+    const parsed: unknown =
+      data?.results?.[0]?.extensions?.[0]?.versions?.[0]?.version
+    /* eslint-enable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access */
+
+    if (typeof parsed !== "string" || !parsed) {
+      throw new Error("Unexpected response shape")
+    }
+
+    return parsed
+  } finally {
+    clearTimeout(timeout)
+  }
+}
+
+export async function getCopilotChatVersion(): Promise<string> {
+  if (cache && Date.now() - cache.fetchedAt < VERSION_CACHE_TTL_MS) {
+    return cache.version
+  }
+
+  let fetched: string | null = null
+
+  try {
+    fetched = await fetchFromMarketplace()
+  } catch {
+    consola.warn(
+      "Failed to fetch Copilot Chat version from Marketplace, using fallback",
+    )
+  }
+
+  const version =
+    fetched !== null && /^\d+\.\d+\.\d+$/.test(fetched) ? fetched : FALLBACK
+
+  if (fetched !== null && version !== FALLBACK) {
+    // eslint-disable-next-line require-atomic-updates
+    cache = { version, fetchedAt: Date.now() }
+  } else if (fetched !== null) {
+    // Format validation rejected the fetched value
+    const safeVersion = fetched.slice(0, 40).replaceAll(/[^\x20-\x7E]/g, "?")
+    consola.warn(
+      `Invalid version format received: ${safeVersion}, using fallback`,
+    )
+  }
+
+  return version
+}
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 6078f09b5..f9732bfed 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,6 +1,36 @@
+import consola from "consola"
+
+import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache"
+
 const FALLBACK = "1.104.3"
 
-export async function getVSCodeVersion() {
+let cache: VersionCache | undefined
+
+async function fetchFromOfficialApi(): Promise<string> {
+  const controller = new AbortController()
+  const timeout = setTimeout(() => {
+    controller.abort()
+  }, 5000)
+
+  try {
+    const response = await fetch(
+      "https://update.code.visualstudio.com/api/releases/stable",
+      { signal: controller.signal },
+    )
+
+    const versions = (await response.json()) as Array<string>
+
+    if (Array.isArray(versions) && versions.length > 0 && versions[0]) {
+      return versions[0]
+    }
+
+    throw new Error("Unexpected response shape")
+  } finally {
+    clearTimeout(timeout)
+  }
+}
+
+async function fetchFromAur(): Promise<string> {
   const controller = new AbortController()
   const timeout = setTimeout(() => {
     controller.abort()
@@ -9,25 +39,54 @@ export async function getVSCodeVersion() {
   try {
     const response = await fetch(
       "https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h=visual-studio-code-bin",
-      {
-        signal: controller.signal,
-      },
+      { signal: controller.signal },
     )
 
     const pkgbuild = await response.text()
-    const pkgverRegex = /pkgver=([0-9.]+)/
-    const match = pkgbuild.match(pkgverRegex)
+    const match = pkgbuild.match(/pkgver=(\d+\.\d+\.\d+)/)
 
-    if (match) {
+    if (match?.[1]) {
       return match[1]
     }
 
-    return FALLBACK
-  } catch {
-    return FALLBACK
+    throw new Error("Version not found in PKGBUILD")
   } finally {
     clearTimeout(timeout)
   }
 }
 
-await getVSCodeVersion()
+export async function getVSCodeVersion(): Promise<string> {
+  if (cache && Date.now() - cache.fetchedAt < VERSION_CACHE_TTL_MS) {
+    return cache.version
+  }
+
+  let fetched: string | null = null
+
+  try {
+    fetched = await fetchFromOfficialApi()
+  } catch {
+    try {
+      fetched = await fetchFromAur()
+    } catch {
+      consola.warn(
+        "Failed to fetch VS Code version from all sources, using fallback",
+      )
+    }
+  }
+
+  const version =
+    fetched !== null && /^\d+\.\d+\.\d+$/.test(fetched) ? fetched : FALLBACK
+
+  if (fetched !== null && version !== FALLBACK) {
+    // eslint-disable-next-line require-atomic-updates
+    cache = { version, fetchedAt: Date.now() }
+  } else if (fetched !== null) {
+    // Format validation rejected the fetched value
+    const safeVersion = fetched.slice(0, 40).replaceAll(/[^\x20-\x7E]/g, "?")
+    consola.warn(
+      `Invalid version format received: ${safeVersion}, using fallback`,
+    )
+  }
+
+  return version
+}
diff --git a/src/services/version-cache.ts b/src/services/version-cache.ts
new file mode 100644
index 000000000..839d29438
--- /dev/null
+++ b/src/services/version-cache.ts
@@ -0,0 +1,6 @@
+export const VERSION_CACHE_TTL_MS = 24 * 60 * 60 * 1000
+
+export interface VersionCache {
+  version: string
+  fetchedAt: number
+}
diff --git a/src/start.ts b/src/start.ts
index 14abbbdff..9fca3b37f 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -11,8 +11,10 @@ import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
-import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
+import { cacheModels } from "./lib/utils"
 import { server } from "./server"
+import { getCopilotChatVersion } from "./services/get-copilot-chat-version"
+import { getVSCodeVersion } from "./services/get-vscode-version"
 
 interface RunServerOptions {
   port: number
@@ -48,7 +50,13 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   state.showToken = options.showToken
 
   await ensurePaths()
-  await cacheVSCodeVersion()
+  ;[state.vsCodeVersion, state.copilotChatVersion] = await Promise.all([
+    getVSCodeVersion(),
+    getCopilotChatVersion(),
+  ])
+  consola.info(
+    `VS Code: ${state.vsCodeVersion}  Copilot Chat: ${state.copilotChatVersion}`,
+  )
 
   if (options.githubToken) {
     state.githubToken = options.githubToken
diff --git a/tests/utils.test.ts b/tests/utils.test.ts
new file mode 100644
index 000000000..923d15545
--- /dev/null
+++ b/tests/utils.test.ts
@@ -0,0 +1,45 @@
+import { describe, test, expect, mock, beforeEach } from "bun:test"
+
+import type { ModelsResponse } from "../src/services/copilot/get-models"
+
+// ---------------------------------------------------------------------------
+// cacheModels — integration test against the real state singleton,
+// with the service function mocked.
+// ---------------------------------------------------------------------------
+
+const fakeModels: ModelsResponse = {
+  object: "list",
+  data: [],
+}
+
+const mockGetModels = mock(() => Promise.resolve(fakeModels))
+
+void mock.module("../src/services/copilot/get-models", () => ({
+  getModels: mockGetModels,
+}))
+
+// Import after mocking so the mocks are active
+import { state } from "../src/lib/state"
+import { cacheModels } from "../src/lib/utils"
+
+describe("cacheModels", () => {
+  beforeEach(() => {
+    state.models = undefined
+    mockGetModels.mockReset()
+  })
+
+  test("sets state.models with value from service", async () => {
+    mockGetModels.mockResolvedValue(fakeModels)
+
+    expect(state.models).toBeUndefined()
+    await cacheModels()
+    expect(state.models).toEqual(fakeModels)
+  })
+
+  test("calls getModels exactly once", async () => {
+    mockGetModels.mockResolvedValue(fakeModels)
+
+    await cacheModels()
+    expect(mockGetModels).toHaveBeenCalledTimes(1)
+  })
+})
diff --git a/tests/version-detection.test.ts b/tests/version-detection.test.ts
new file mode 100644
index 000000000..a0a4b96e8
--- /dev/null
+++ b/tests/version-detection.test.ts
@@ -0,0 +1,349 @@
+import { describe, test, expect, mock, beforeEach } from "bun:test"
+import { setSystemTime } from "bun:test"
+
+import type { state as StateType } from "../src/lib/state"
+import type { getCopilotChatVersion as GetCopilotChatVersion } from "../src/services/get-copilot-chat-version"
+import type { getVSCodeVersion as GetVSCodeVersion } from "../src/services/get-vscode-version"
+
+// ---------------------------------------------------------------------------
+// We test the modules by mocking global `fetch` before importing them.
+// Each describe block re-imports after resetting the module registry so we
+// get fresh module-level caches for every suite.
+// ---------------------------------------------------------------------------
+
+// Helpers ----------------------------------------------------------------
+
+function makeFetchMock(handler: (url: string, init?: RequestInit) => Response) {
+  return mock((url: string | URL | Request, init?: RequestInit) => {
+    const urlStr = url instanceof Request ? url.url : url.toString()
+    return Promise.resolve(handler(urlStr, init))
+  }) as unknown as typeof fetch
+}
+
+function jsonResponse(body: unknown, status = 200): Response {
+  return new Response(JSON.stringify(body), {
+    status,
+    headers: { "Content-Type": "application/json" },
+  })
+}
+
+function textResponse(body: string, status = 200): Response {
+  return new Response(body, {
+    status,
+    headers: { "Content-Type": "text/plain" },
+  })
+}
+
+// Module type aliases for properly typed dynamic imports
+type VSCodeVersionModule = { getVSCodeVersion: typeof GetVSCodeVersion }
+type CopilotChatVersionModule = {
+  getCopilotChatVersion: typeof GetCopilotChatVersion
+}
+type StateModule = { state: typeof StateType }
+
+// ---------------------------------------------------------------------------
+// getVSCodeVersion tests
+// ---------------------------------------------------------------------------
+
+describe("getVSCodeVersion", () => {
+  let callCount = 0
+
+  beforeEach(() => {
+    callCount = 0
+  })
+
+  test("returns version from official VS Code API (primary path)", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      callCount++
+      return jsonResponse(["1.99.0", "1.98.0"])
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now()}`
+    )) as VSCodeVersionModule
+    const version = await mod.getVSCodeVersion()
+    expect(version).toBe("1.99.0")
+    expect(callCount).toBe(1)
+  })
+
+  test("falls back to AUR when official API fails", async () => {
+    let requestIndex = 0
+    globalThis.fetch = makeFetchMock((_url) => {
+      const i = requestIndex++
+      if (i === 0) throw new Error("network error")
+      // AUR PKGBUILD response
+      return textResponse("pkgver=1.88.0\narch=(x86_64)")
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now() + 1}`
+    )) as VSCodeVersionModule
+    const version = await mod.getVSCodeVersion()
+    expect(version).toBe("1.88.0")
+  })
+
+  test("returns hardcoded fallback when both official API and AUR fail", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      throw new Error("offline")
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now() + 2}`
+    )) as VSCodeVersionModule
+    const version = await mod.getVSCodeVersion()
+    expect(version).toBe("1.104.3")
+  })
+
+  test("cache prevents second fetch within TTL", async () => {
+    let fetchCallCount = 0
+    globalThis.fetch = makeFetchMock((_url) => {
+      fetchCallCount++
+      return jsonResponse(["1.99.5"])
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now() + 3}`
+    )) as VSCodeVersionModule
+
+    const v1 = await mod.getVSCodeVersion()
+    const v2 = await mod.getVSCodeVersion()
+
+    expect(v1).toBe("1.99.5")
+    expect(v2).toBe("1.99.5")
+    // fetch should only have been called once
+    expect(fetchCallCount).toBe(1)
+  })
+
+  // T1 — VS Code API returns malformed JSON (non-array body {}): falls back to AUR
+  test("T1: falls back to AUR when official API returns non-array body", async () => {
+    let requestIndex = 0
+    globalThis.fetch = makeFetchMock((_url) => {
+      const i = requestIndex++
+      if (i === 0) return jsonResponse({}) // non-array — triggers "Unexpected response shape"
+      // AUR PKGBUILD response
+      return textResponse("pkgver=1.90.0\narch=(x86_64)")
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now() + 10}`
+    )) as VSCodeVersionModule
+    const version = await mod.getVSCodeVersion()
+    expect(version).toBe("1.90.0")
+    expect(requestIndex).toBe(2)
+  })
+
+  // T2 — AUR PKGBUILD missing pkgver= line: returns hardcoded fallback
+  test("T2: returns hardcoded fallback when AUR PKGBUILD has no pkgver line", async () => {
+    let requestIndex = 0
+    globalThis.fetch = makeFetchMock((_url) => {
+      const i = requestIndex++
+      if (i === 0) throw new Error("network error")
+      // AUR response missing pkgver=
+      return textResponse("pkgdesc='VSCode'\npkgrel=1\n")
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now() + 11}`
+    )) as VSCodeVersionModule
+    const version = await mod.getVSCodeVersion()
+    expect(version).toBe("1.104.3")
+  })
+
+  // T5 — TTL expiry triggers refetch
+  test("T5: TTL expiry triggers a new fetch", async () => {
+    const CACHE_TTL = 24 * 60 * 60 * 1000
+    let fetchCount = 0
+    globalThis.fetch = makeFetchMock((_url) => {
+      fetchCount++
+      return jsonResponse(["1.99.0"])
+    })
+
+    const mod = (await import(
+      `../src/services/get-vscode-version.ts?t=${Date.now() + 12}`
+    )) as VSCodeVersionModule
+
+    // First call — populates cache
+    await mod.getVSCodeVersion()
+    expect(fetchCount).toBe(1)
+
+    // Advance clock past TTL
+    setSystemTime(new Date(Date.now() + CACHE_TTL + 1))
+
+    try {
+      // Second call — cache expired, should fetch again
+      await mod.getVSCodeVersion()
+      expect(fetchCount).toBe(2)
+    } finally {
+      // Always reset system time
+      setSystemTime()
+    }
+  })
+})
+
+// ---------------------------------------------------------------------------
+// getCopilotChatVersion tests
+// ---------------------------------------------------------------------------
+
+describe("getCopilotChatVersion", () => {
+  const MARKETPLACE_URL =
+    "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery"
+
+  const validMarketplaceResponse = {
+    results: [
+      {
+        extensions: [
+          {
+            versions: [{ version: "0.30.1" }],
+          },
+        ],
+      },
+    ],
+  }
+
+  test("returns version from Marketplace API", async () => {
+    globalThis.fetch = makeFetchMock((url) => {
+      expect(url).toBe(MARKETPLACE_URL)
+      return jsonResponse(validMarketplaceResponse)
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now()}`
+    )) as CopilotChatVersionModule
+    const version = await mod.getCopilotChatVersion()
+    expect(version).toBe("0.30.1")
+  })
+
+  test("returns hardcoded fallback on network error", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      throw new Error("connection refused")
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 1}`
+    )) as CopilotChatVersionModule
+    const version = await mod.getCopilotChatVersion()
+    expect(version).toBe("0.26.7")
+  })
+
+  test("returns hardcoded fallback when API response has unexpected shape", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      return jsonResponse({ results: [] })
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 2}`
+    )) as CopilotChatVersionModule
+    const version = await mod.getCopilotChatVersion()
+    expect(version).toBe("0.26.7")
+  })
+
+  test("cache prevents second fetch within TTL", async () => {
+    let fetchCallCount = 0
+    globalThis.fetch = makeFetchMock((_url) => {
+      fetchCallCount++
+      return jsonResponse(validMarketplaceResponse)
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 3}`
+    )) as CopilotChatVersionModule
+
+    const v1 = await mod.getCopilotChatVersion()
+    const v2 = await mod.getCopilotChatVersion()
+
+    expect(v1).toBe("0.30.1")
+    expect(v2).toBe("0.30.1")
+    expect(fetchCallCount).toBe(1)
+  })
+
+  // T3 — Marketplace returns HTTP 503
+  test("T3: returns fallback when Marketplace returns HTTP 503", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      return new Response("Service Unavailable", { status: 503 })
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 10}`
+    )) as CopilotChatVersionModule
+    const version = await mod.getCopilotChatVersion()
+    expect(version).toBe("0.26.7")
+  })
+
+  // T4 — Marketplace returns version: "" (empty string)
+  test("T4: returns fallback when Marketplace version is empty string", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      return jsonResponse({
+        results: [{ extensions: [{ versions: [{ version: "" }] }] }],
+      })
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 11}`
+    )) as CopilotChatVersionModule
+    const version = await mod.getCopilotChatVersion()
+    expect(version).toBe("0.26.7")
+  })
+
+  // T7 — Format validation rejects CRLF-injected version
+  test("T7: rejects version with CRLF injection and returns fallback", async () => {
+    globalThis.fetch = makeFetchMock((_url) => {
+      return jsonResponse({
+        results: [
+          {
+            extensions: [{ versions: [{ version: "1.0.0\r\nEvil: header" }] }],
+          },
+        ],
+      })
+    })
+
+    const mod = (await import(
+      `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 12}`
+    )) as CopilotChatVersionModule
+    const version = await mod.getCopilotChatVersion()
+    expect(version).toBe("0.26.7")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// State interface test — shape check
+// ---------------------------------------------------------------------------
+
+describe("State type includes copilotChatVersion", () => {
+  test("state object accepts copilotChatVersion field", async () => {
+    const { state } = (await import(
+      `../src/lib/state.ts?t=${Date.now()}`
+    )) as StateModule
+    // Field must be optionally present (undefined by default)
+    expect(state.copilotChatVersion).toBeUndefined()
+
+    // Should be assignable without TS errors (runtime check)
+    state.copilotChatVersion = "0.26.7"
+    expect(state.copilotChatVersion).toBe("0.26.7")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// T6 — api-config header uses fallback when copilotChatVersion is undefined
+// ---------------------------------------------------------------------------
+
+describe("copilotHeaders fallback", () => {
+  test("T6: editor-plugin-version uses fallback string when state.copilotChatVersion is undefined", async () => {
+    const { copilotHeaders } = await import("../src/lib/api-config")
+    const minimalState = {
+      accountType: "individual",
+      manualApprove: false,
+      rateLimitWait: false,
+      showToken: false,
+      copilotToken: "tok",
+      vsCodeVersion: "1.99.0",
+      copilotChatVersion: undefined,
+    }
+
+    const headers = copilotHeaders(
+      minimalState as Parameters<typeof copilotHeaders>[0],
+      false,
+    )
+    expect(headers["editor-plugin-version"]).not.toBe("copilot-chat/undefined")
+    expect(headers["editor-plugin-version"]).toBe("copilot-chat/0.26.7")
+  })
+})

From cc11c1d22b3f6e4c805bb314699ba00cc4790708 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 01:50:28 +0800
Subject: [PATCH 02/17] feat(native-anthropic): add pass-through service,
 dispatch, type fixes (#38-#45)

---
 docs/prd/native-anthropic-passthrough.md      |  74 ++++++++
 src/routes/messages/anthropic-types.ts        |  39 ++++-
 src/routes/messages/handler.ts                |  75 +++++++-
 src/routes/messages/non-stream-translation.ts |  26 ++-
 .../copilot/create-messages-native.ts         | 161 ++++++++++++++++++
 src/services/copilot/native-models.ts         |  57 +++++++
 6 files changed, 412 insertions(+), 20 deletions(-)
 create mode 100644 docs/prd/native-anthropic-passthrough.md
 create mode 100644 src/services/copilot/create-messages-native.ts
 create mode 100644 src/services/copilot/native-models.ts

diff --git a/docs/prd/native-anthropic-passthrough.md b/docs/prd/native-anthropic-passthrough.md
new file mode 100644
index 000000000..606c98ca8
--- /dev/null
+++ b/docs/prd/native-anthropic-passthrough.md
@@ -0,0 +1,74 @@
+# Native Anthropic Pass-Through for Claude Models
+
+## Status
+Approved
+
+## Overview
+Route Anthropic `/v1/messages` requests for Claude models directly to the GitHub Copilot upstream's native Anthropic endpoint, bypassing the existing OpenAI translation layer. This preserves thinking blocks with `signature` field, `top_k`, `cache_control`, and richer usage stats — none of which survive the current translation round-trip.
+
+## Motivation
+GitHub Copilot's upstream (`api.enterprise.githubcopilot.com`) natively speaks the Anthropic Messages API for all Claude 4.5+ models. The current code path translates Anthropic → OpenAI → sends → translates back, losing:
+- `thinking` blocks (completely dropped)
+- `signature` field on thinking blocks (required for multi-turn reasoning)
+- `cache_creation_input_tokens` in usage
+- `top_k` parameter
+- `cache_control` on system/user blocks
+
+The fix: detect Claude models by `vendor === "Anthropic"` from the `/models` endpoint, and forward requests verbatim to `/v1/messages` upstream.
+
+## Requirements
+
+1. **`create-messages-native.ts`** — Service client that POSTs Anthropic payloads directly to `${copilotBaseUrl}/v1/messages` with correct headers (`anthropic-version`, `anthropic-beta`).
+2. **Route dispatch** — `handler.ts` checks `isNativeAnthropicModel(model)` and branches to native path for Claude, translation path for everything else.
+3. **`native-models.ts`** — `isNativeAnthropicModel(modelId)` checks `state.models` vendor field; falls back to `claude-` prefix heuristic before models load.
+4. **Type fixes** — `anthropic-types.ts`: `signature?` on `AnthropicThinkingBlock`; union `thinking` type for adaptive (opus-4.7+); `output_config`; `AnthropicImageBlock` URL source; `AnthropicToolResultBlock.content` widened.
+5. **Adaptive thinking upgrade** — `create-messages-native.ts` auto-upgrades `{ type: "enabled" }` → `{ type: "adaptive" }` + `output_config.effort` for `claude-opus-4.7+` models.
+6. **SSE proxy** — Streaming responses from native path forwarded verbatim to client (no re-translation needed).
+
+## Acceptance Criteria
+
+- Claude models (`vendor === "Anthropic"`) route to native path; non-Claude models route to translation path.
+- Thinking blocks with `signature` field returned to client in both streaming and non-streaming.
+- Multi-turn conversations with thinking blocks (echoing `signature`) work correctly.
+- `claude-opus-4.7+` with `{ type: "enabled" }` thinking auto-upgrades to adaptive format; no HTTP 400.
+- All existing tests pass; new tests cover native vs. translation dispatch.
+
+## Technical Approach
+
+### Model detection
+`state.models.data` from `/models` endpoint has `vendor: "Anthropic"` for all Claude models. `isNativeAnthropicModel()` checks this first, falls back to `startsWith("claude-")` heuristic.
+
+### Headers for native path
+```
+anthropic-version: 2023-06-01
+anthropic-beta: interleaved-thinking-2025-05-14,prompt-caching-2024-07-31
+```
+Plus all standard Copilot headers (auth, editor-version, etc.).
+
+### Streaming proxy
+Native upstream sends proper Anthropic SSE events. Parse `event.type` for logging; forward `rawEvent.data` verbatim. No translation needed.
+
+### Adaptive thinking (opus-4.7+)
+If model matches `/^claude-opus-4[.-](\d+)/` with minor ≥ 7, auto-upgrade `{ type: "enabled", budget_tokens: N }` → `{ type: "adaptive" }` + `output_config: { effort: "medium" }`.
+
+## File Changes
+
+**New:**
+- `src/services/copilot/create-messages-native.ts`
+- `src/services/copilot/native-models.ts`
+
+**Modified:**
+- `src/routes/messages/anthropic-types.ts` — type fixes
+- `src/routes/messages/handler.ts` — dispatch logic
+- `src/routes/messages/non-stream-translation.ts` — remove stale comment; fix image source narrowing
+
+## Testing Strategy
+- Unit: `isNativeAnthropicModel()` with populated vs empty `state.models`
+- Unit: `buildUpstreamPayload()` adaptive thinking upgrade
+- Integration: handler routes Claude models to native, GPT models to translation
+- Existing translation tests must still pass
+
+## Out of Scope
+- Persistent caching of native responses
+- URL image sources (rejected by upstream; type kept for fidelity)
+- Responses API (#1 epic)
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc8..dda7657b3 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -18,9 +18,16 @@ export interface AnthropicMessagesPayload {
     type: "auto" | "any" | "tool" | "none"
     name?: string
   }
-  thinking?: {
-    type: "enabled"
-    budget_tokens?: number
+  /**
+   * Thinking config.
+   * - Legacy (claude-3.7 / claude-4.5): `{ type: "enabled", budget_tokens: N }`
+   * - New adaptive (claude-opus-4.7+): `{ type: "adaptive" }` paired with
+   *   `output_config.effort` in the request body.
+   */
+  thinking?: { type: "enabled"; budget_tokens?: number } | { type: "adaptive" }
+  /** Used together with `thinking: { type: "adaptive" }` on opus-4.7+. */
+  output_config?: {
+    effort?: "low" | "medium" | "high"
   }
   service_tier?: "auto" | "standard_only"
 }
@@ -32,17 +39,24 @@ export interface AnthropicTextBlock {
 
 export interface AnthropicImageBlock {
   type: "image"
-  source: {
-    type: "base64"
-    media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
-    data: string
-  }
+  source:
+    | {
+        type: "base64"
+        media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
+        data: string
+      }
+    | {
+        /** URL images are rejected by Copilot upstream — kept for type fidelity only. */
+        type: "url"
+        url: string
+      }
 }
 
 export interface AnthropicToolResultBlock {
   type: "tool_result"
   tool_use_id: string
-  content: string
+  /** May be a plain string or an array of content blocks. */
+  content: string | Array<AnthropicTextBlock | AnthropicImageBlock>
   is_error?: boolean
 }
 
@@ -56,6 +70,12 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  /**
+   * Opaque signature returned by the upstream for extended thinking blocks.
+   * Must be echoed back in subsequent turns to enable multi-turn reasoning.
+   * Present on native pass-through responses; absent on translated responses.
+   */
+  signature?: string
 }
 
 export type AnthropicUserContentBlock =
@@ -106,6 +126,7 @@ export interface AnthropicResponse {
     output_tokens: number
     cache_creation_input_tokens?: number
     cache_read_input_tokens?: number
+    /** Present on native pass-through responses. */
     service_tier?: "standard" | "priority" | "batch"
   }
 }
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..cf691ffd7 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -11,9 +11,12 @@ import {
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { createMessagesNative } from "~/services/copilot/create-messages-native"
+import { isNativeAnthropicModel } from "~/services/copilot/native-models"
 
 import {
   type AnthropicMessagesPayload,
+  type AnthropicStreamEventData,
   type AnthropicStreamState,
 } from "./anthropic-types"
 import {
@@ -28,16 +31,80 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  // Route to native Anthropic pass-through for Claude models to preserve
+  // thinking blocks (with signature), top_k, cache_control, and richer usage.
+  if (isNativeAnthropicModel(anthropicPayload.model)) {
+    return handleNative(c, anthropicPayload)
+  }
+
+  return handleTranslated(c, anthropicPayload)
+}
+
+// ---------------------------------------------------------------------------
+// Native Anthropic pass-through (Claude 4.5+ models)
+// ---------------------------------------------------------------------------
+
+async function handleNative(
+  c: Context,
+  payload: AnthropicMessagesPayload,
+): Promise<Response> {
+  consola.debug("Using native Anthropic pass-through for", payload.model)
+
+  const response = await createMessagesNative(payload)
+
+  if (!payload.stream) {
+    // Non-streaming: upstream already returned a complete Anthropic response
+    consola.debug(
+      "Native non-streaming response:",
+      JSON.stringify(response).slice(0, 400),
+    )
+    return c.json(response)
+  }
+
+  // Streaming: proxy the SSE events directly to the client
+  consola.debug("Native streaming response — proxying SSE events")
+  return streamSSE(c, async (stream) => {
+    for await (const rawEvent of response as AsyncIterable<{
+      data?: string
+      event?: string
+    }>) {
+      if (rawEvent.data === "[DONE]") break
+      if (!rawEvent.data) continue
+
+      // Parse to log but forward the original JSON verbatim
+      try {
+        const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData
+        consola.debug("Native SSE event:", parsed.type)
+        await stream.writeSSE({
+          event: parsed.type,
+          data: rawEvent.data,
+        })
+      } catch {
+        // Malformed chunk — skip
+        consola.warn("Could not parse native SSE chunk:", rawEvent.data)
+      }
+    }
+  })
+}
+
+// ---------------------------------------------------------------------------
+// Translation path (non-Claude models via /chat/completions)
+// ---------------------------------------------------------------------------
+
+async function handleTranslated(
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+): Promise<Response> {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
 
-  if (state.manualApprove) {
-    await awaitApproval()
-  }
-
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..e154c3714 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,5 @@
+import consola from "consola"
+
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -213,12 +215,20 @@ function mapContent(
         break
       }
       case "image": {
-        contentParts.push({
-          type: "image_url",
-          image_url: {
-            url: `data:${block.source.media_type};base64,${block.source.data}`,
-          },
-        })
+        if (block.source.type === "base64") {
+          contentParts.push({
+            type: "image_url",
+            image_url: {
+              url: `data:${block.source.media_type};base64,${block.source.data}`,
+            },
+          })
+        } else {
+          // URL images are rejected by Copilot upstream — skip silently
+          // (type kept for fidelity when round-tripping through native path)
+          consola.warn(
+            "URL image source not supported in translation path — skipping",
+          )
+        }
 
         break
       }
@@ -302,7 +312,9 @@ export function translateToAnthropic(
     }
   }
 
-  // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
+  // Note: the translation path routes Claude models via /chat/completions which
+  // does not return thinking blocks. For thinking block support use the native
+  // Anthropic pass-through path (create-messages-native.ts).
 
   return {
     id: response.id,
diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts
new file mode 100644
index 000000000..0fef793bf
--- /dev/null
+++ b/src/services/copilot/create-messages-native.ts
@@ -0,0 +1,161 @@
+/**
+ * Native Anthropic pass-through service.
+ *
+ * The GitHub Copilot upstream (`api.enterprise.githubcopilot.com`) natively
+ * speaks the Anthropic Messages API for all Claude 4.5+ models.  Routing
+ * requests directly to `/v1/messages` instead of translating them through
+ * `/chat/completions` gives us:
+ *
+ *  - Real thinking blocks with `signature` field (multi-turn reasoning)
+ *  - `cache_creation_input_tokens` in usage
+ *  - `top_k` support
+ *  - No lossy translation round-trip
+ *
+ * See research notes: ~/copilot-models-litellm/copilot_models.py
+ */
+
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+/**
+ * Forward an Anthropic-format request directly to Copilot's native `/v1/messages`
+ * endpoint, preserving all fields (thinking, signature, top_k, cache_control, …).
+ *
+ * Returns:
+ *  - For non-streaming: the raw Anthropic JSON response object
+ *  - For streaming: an async iterable of SSE events (fetch-event-stream)
+ */
+export const createMessagesNative = async (
+  payload: AnthropicMessagesPayload,
+) => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const hasVision = messageHasImages(payload)
+  const headers = buildNativeHeaders(hasVision)
+
+  const upstream = `${copilotBaseUrl(state)}/v1/messages`
+  consola.debug("Native Anthropic upstream:", upstream)
+
+  // Strip fields that are Copilot-API–specific or unsupported by upstream
+  const body = buildUpstreamPayload(payload)
+
+  const response = await fetch(upstream, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(body),
+  })
+
+  if (!response.ok) {
+    consola.error("Native Anthropic upstream error", response.status)
+    throw new HTTPError("Native Anthropic upstream error", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return response.json()
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Build headers for the Anthropic native endpoint.
+ *
+ * The upstream requires `anthropic-version` and does NOT want an `openai-intent`
+ * header.  We reuse `copilotHeaders()` for auth/agent headers and then layer the
+ * Anthropic-specific ones on top.
+ */
+function buildNativeHeaders(vision: boolean): Record<string, string> {
+  const base = copilotHeaders(state, vision)
+
+  // The native /v1/messages endpoint expects these Anthropic headers
+  return {
+    ...base,
+    "anthropic-version": "2023-06-01",
+    // Enable beta features: extended thinking + prompt caching
+    "anthropic-beta":
+      "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31",
+    // Accept Anthropic streaming format
+    accept: "text/event-stream",
+    // The upstream doesn't use openai-intent for the messages path
+    // but leaving it does no harm; keep for header consistency
+  }
+}
+
+/**
+ * Produce the payload forwarded to upstream.
+ *
+ * We pass through almost everything verbatim.  The only transformation is that
+ * `claude-opus-4.7+` requires the new adaptive thinking format
+ * (`thinking: { type: "adaptive" }` + `output_config.effort`) rather than the
+ * legacy `{ type: "enabled", budget_tokens: N }`.  If the caller already sent
+ * the correct format we leave it alone; if they sent the old format and the
+ * model requires adaptive, we upgrade automatically.
+ */
+function buildUpstreamPayload(
+  payload: AnthropicMessagesPayload,
+): AnthropicMessagesPayload {
+  const { thinking, output_config, ...rest } = payload
+
+  if (!thinking) {
+    return payload
+  }
+
+  if (isAdaptiveThinkingModel(payload.model)) {
+    // Upgrade legacy enabled → adaptive if needed
+    if (thinking.type === "enabled") {
+      consola.debug(
+        `Upgrading thinking format to adaptive for model ${payload.model}`,
+      )
+      return {
+        ...rest,
+        thinking: { type: "adaptive" },
+        output_config: output_config ?? { effort: "medium" },
+      }
+    }
+    // Already adaptive — forward as-is
+    return { ...rest, thinking, output_config }
+  }
+
+  // Non-adaptive model — forward legacy format, drop output_config
+  return { ...rest, thinking }
+}
+
+/**
+ * Models that require the new adaptive thinking API.
+ * Populated dynamically at dispatch time via `isNativeAnthropicModel()`.
+ * This hard-coded check is the fallback.
+ */
+function isAdaptiveThinkingModel(model: string): boolean {
+  // claude-opus-4.7 and above use adaptive thinking
+  const match = model.match(/^claude-opus-4[.-](\d+)/)
+  if (match) {
+    const minor = Number.parseInt(match[1], 10)
+    return minor >= 7
+  }
+  return false
+}
+
+/**
+ * Check whether the request contains any image blocks (to set vision headers).
+ */
+function messageHasImages(payload: AnthropicMessagesPayload): boolean {
+  for (const msg of payload.messages) {
+    if (typeof msg.content === "string") continue
+    if (Array.isArray(msg.content)) {
+      for (const block of msg.content) {
+        if (block.type === "image") return true
+      }
+    }
+  }
+  return false
+}
diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts
new file mode 100644
index 000000000..e411b34e4
--- /dev/null
+++ b/src/services/copilot/native-models.ts
@@ -0,0 +1,57 @@
+/**
+ * Dynamic detection of which models support native Anthropic pass-through.
+ *
+ * The Copilot `/models` endpoint returns a `vendor` field for each model.
+ * Any model with `vendor === "Anthropic"` is served natively via the
+ * `/v1/messages` path at `api.enterprise.githubcopilot.com`.
+ *
+ * We cache the set of native model IDs after the first `/models` call and
+ * keep it in sync with `state.models` (which is refreshed periodically by
+ * the token-rotation logic).
+ */
+
+import { state } from "~/lib/state"
+
+/**
+ * Returns true if the given model ID should be routed to the native
+ * Anthropic pass-through service instead of the OpenAI chat-completions
+ * translation layer.
+ *
+ * Resolution order:
+ *  1. If `state.models` is populated, check whether the model's vendor is
+ *     "Anthropic" (live, always up-to-date).
+ *  2. Fall back to a static prefix list for resilience at startup before
+ *     the models list is fetched.
+ */
+export function isNativeAnthropicModel(modelId: string): boolean {
+  if (state.models?.data) {
+    const entry = state.models.data.find((m) => m.id === modelId)
+    if (entry) {
+      return entry.vendor === "Anthropic"
+    }
+    // Model not found in list — fall through to prefix heuristic
+  }
+
+  return matchesAnthropicPrefix(modelId)
+}
+
+/**
+ * Static prefix heuristic used before `state.models` is populated.
+ * Covers all current Claude variants served by Copilot.
+ */
+function matchesAnthropicPrefix(modelId: string): boolean {
+  return modelId.startsWith("claude-") || modelId.startsWith("claude_")
+}
+
+/**
+ * Return the full list of model IDs that support native Anthropic pass-through,
+ * derived from `state.models`.  Useful for logging / diagnostics.
+ *
+ * Falls back to an empty array if the models list has not been fetched yet.
+ */
+export function nativeAnthropicModelIds(): ReadonlyArray<string> {
+  if (!state.models?.data) return []
+  return state.models.data
+    .filter((m) => m.vendor === "Anthropic")
+    .map((m) => m.id)
+}

From dce9e6c3355e686ea28ddbecb5d6891adb255ce8 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 01:57:22 +0800
Subject: [PATCH 03/17] fix: address review round 1 feedback for native
 pass-through (#38)

- H1: Remove dead [DONE] sentinel from native SSE loop (Anthropic terminates via connection close)
- H3: Conditionally set accept: text/event-stream only when streaming
- M1: buildUpstreamPayload returns rest (not payload) when thinking absent, stripping output_config
- M2: Truncate raw SSE data to 200 chars in warn log to prevent log injection
- L2: Remove claude_ underscore prefix heuristic (no known Anthropic model uses it)
- L3: Document >= 7 threshold comment in isAdaptiveThinkingModel
- L4: Replace verbose JSDoc on nativeAnthropicModelIds with concise standard form
- Export buildUpstreamPayload for direct unit testing
- Add tests/native-passthrough.test.ts with T1-T9 covering payload transform and model routing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/messages/handler.ts                |   6 +-
 .../copilot/create-messages-native.ts         |  19 ++-
 src/services/copilot/native-models.ts         |   8 +-
 tests/native-passthrough.test.ts              | 160 ++++++++++++++++++
 4 files changed, 178 insertions(+), 15 deletions(-)
 create mode 100644 tests/native-passthrough.test.ts

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index cf691ffd7..2bf1005ea 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -72,7 +72,6 @@ async function handleNative(
       data?: string
       event?: string
     }>) {
-      if (rawEvent.data === "[DONE]") break
       if (!rawEvent.data) continue
 
       // Parse to log but forward the original JSON verbatim
@@ -85,7 +84,10 @@ async function handleNative(
         })
       } catch {
         // Malformed chunk — skip
-        consola.warn("Could not parse native SSE chunk:", rawEvent.data)
+        consola.warn(
+          "Could not parse native SSE chunk:",
+          rawEvent.data.slice(0, 200),
+        )
       }
     }
   })
diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts
index 0fef793bf..e6c41cb72 100644
--- a/src/services/copilot/create-messages-native.ts
+++ b/src/services/copilot/create-messages-native.ts
@@ -37,7 +37,7 @@ export const createMessagesNative = async (
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
   const hasVision = messageHasImages(payload)
-  const headers = buildNativeHeaders(hasVision)
+  const headers = buildNativeHeaders(hasVision, Boolean(payload.stream))
 
   const upstream = `${copilotBaseUrl(state)}/v1/messages`
   consola.debug("Native Anthropic upstream:", upstream)
@@ -74,7 +74,10 @@ export const createMessagesNative = async (
  * header.  We reuse `copilotHeaders()` for auth/agent headers and then layer the
  * Anthropic-specific ones on top.
  */
-function buildNativeHeaders(vision: boolean): Record<string, string> {
+function buildNativeHeaders(
+  vision: boolean,
+  stream: boolean,
+): Record<string, string> {
   const base = copilotHeaders(state, vision)
 
   // The native /v1/messages endpoint expects these Anthropic headers
@@ -84,10 +87,9 @@ function buildNativeHeaders(vision: boolean): Record<string, string> {
     // Enable beta features: extended thinking + prompt caching
     "anthropic-beta":
       "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31",
-    // Accept Anthropic streaming format
-    accept: "text/event-stream",
-    // The upstream doesn't use openai-intent for the messages path
-    // but leaving it does no harm; keep for header consistency
+    // Only request SSE streaming format when the caller is streaming;
+    // non-streaming calls should use the default application/json accept
+    ...(stream ? { accept: "text/event-stream" } : {}),
   }
 }
 
@@ -101,13 +103,13 @@ function buildNativeHeaders(vision: boolean): Record<string, string> {
  * the correct format we leave it alone; if they sent the old format and the
  * model requires adaptive, we upgrade automatically.
  */
-function buildUpstreamPayload(
+export function buildUpstreamPayload(
   payload: AnthropicMessagesPayload,
 ): AnthropicMessagesPayload {
   const { thinking, output_config, ...rest } = payload
 
   if (!thinking) {
-    return payload
+    return rest // safe: output_config only valid alongside thinking
   }
 
   if (isAdaptiveThinkingModel(payload.model)) {
@@ -140,6 +142,7 @@ function isAdaptiveThinkingModel(model: string): boolean {
   const match = model.match(/^claude-opus-4[.-](\d+)/)
   if (match) {
     const minor = Number.parseInt(match[1], 10)
+    // claude-opus-4.7 and later use the new adaptive thinking API (not legacy budget_tokens)
     return minor >= 7
   }
   return false
diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts
index e411b34e4..7c3d4f59b 100644
--- a/src/services/copilot/native-models.ts
+++ b/src/services/copilot/native-models.ts
@@ -40,14 +40,12 @@ export function isNativeAnthropicModel(modelId: string): boolean {
  * Covers all current Claude variants served by Copilot.
  */
 function matchesAnthropicPrefix(modelId: string): boolean {
-  return modelId.startsWith("claude-") || modelId.startsWith("claude_")
+  return modelId.startsWith("claude-")
 }
 
 /**
- * Return the full list of model IDs that support native Anthropic pass-through,
- * derived from `state.models`.  Useful for logging / diagnostics.
- *
- * Falls back to an empty array if the models list has not been fetched yet.
+ * Returns all model IDs that support native Anthropic pass-through.
+ * Used for diagnostics and startup logging.
  */
 export function nativeAnthropicModelIds(): ReadonlyArray<string> {
   if (!state.models?.data) return []
diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts
new file mode 100644
index 000000000..759df813d
--- /dev/null
+++ b/tests/native-passthrough.test.ts
@@ -0,0 +1,160 @@
+import { describe, test, expect, afterEach } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+import { state } from "~/lib/state"
+import { buildUpstreamPayload } from "~/services/copilot/create-messages-native"
+import { isNativeAnthropicModel } from "~/services/copilot/native-models"
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Minimal valid payload base — only the fields required by the type. */
+function basePayload(
+  overrides: Partial<AnthropicMessagesPayload>,
+): AnthropicMessagesPayload {
+  return {
+    model: "claude-sonnet-4-5",
+    messages: [{ role: "user", content: "hi" }],
+    max_tokens: 1024,
+    ...overrides,
+  }
+}
+
+// ---------------------------------------------------------------------------
+// buildUpstreamPayload tests
+// ---------------------------------------------------------------------------
+
+describe("buildUpstreamPayload", () => {
+  // T1 — output_config present but thinking absent → output_config stripped
+  test("T1: strips output_config when thinking is absent", () => {
+    const payload = basePayload({
+      output_config: { effort: "high" },
+    })
+    const result = buildUpstreamPayload(payload)
+    expect(result).not.toHaveProperty("output_config")
+    expect(result).not.toHaveProperty("thinking")
+  })
+
+  // T2 — adaptive upgrade preserves explicit output_config: { effort: "high" }
+  test("T2: adaptive upgrade preserves explicit output_config effort", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled" },
+      output_config: { effort: "high" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    // Should keep caller's "high", not override to "medium"
+    expect(result.output_config).toEqual({ effort: "high" })
+  })
+
+  // T3 — already adaptive → forwarded as-is
+  test("T3: already-adaptive thinking forwarded as-is", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "adaptive" },
+      output_config: { effort: "low" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    expect(result.output_config).toEqual({ effort: "low" })
+  })
+
+  // T4 — legacy model with enabled thinking → kept as-is, no adaptive upgrade
+  test("T4: legacy model with enabled thinking kept as-is", () => {
+    const payload = basePayload({
+      model: "claude-sonnet-4-5",
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// isNativeAnthropicModel tests
+// ---------------------------------------------------------------------------
+
+// Save original models state and restore after each test
+const originalModels = state.models
+
+afterEach(() => {
+  state.models = originalModels
+})
+
+describe("isNativeAnthropicModel", () => {
+  // T5 — model in loaded list with vendor "Anthropic" → true
+  test("T5: model with vendor Anthropic in loaded list → true", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "claude-sonnet-4-5",
+          vendor: "Anthropic",
+          name: "Claude Sonnet 4.5",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "claude",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "chat",
+          },
+        },
+      ],
+    }
+    expect(isNativeAnthropicModel("claude-sonnet-4-5")).toBe(true)
+  })
+
+  // T6 — model in loaded list with vendor "OpenAI" → false
+  test("T6: model with vendor OpenAI in loaded list → false", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-4o",
+          vendor: "OpenAI",
+          name: "GPT-4o",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "gpt",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "chat",
+          },
+        },
+      ],
+    }
+    expect(isNativeAnthropicModel("gpt-4o")).toBe(false)
+  })
+
+  // T7 — model NOT in loaded list, starts with "claude-" → true (heuristic)
+  test("T7: model not in loaded list but starts with claude- → true", () => {
+    state.models = { object: "list", data: [] }
+    expect(isNativeAnthropicModel("claude-future-1")).toBe(true)
+  })
+
+  // T8 — model NOT in loaded list, starts with "gpt-" → false
+  test("T8: model not in loaded list and starts with gpt- → false", () => {
+    state.models = { object: "list", data: [] }
+    expect(isNativeAnthropicModel("gpt-5")).toBe(false)
+  })
+
+  // T9 — state.models undefined → heuristic
+  test("T9: state.models undefined → heuristic (claude- prefix → true)", () => {
+    state.models = undefined
+    expect(isNativeAnthropicModel("claude-something")).toBe(true)
+  })
+})

From 6c92355168144fbcdc60cb11ec3dc56464aeab49 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:11:37 +0800
Subject: [PATCH 04/17] fix: address review round 2 feedback for native
 pass-through (#38)

---
 src/routes/messages/handler.ts                |  15 +--
 .../copilot/create-messages-native.ts         |  19 ++--
 src/services/copilot/native-models.ts         |  19 ----
 tests/native-passthrough.test.ts              | 105 +++++++++++++++++-
 4 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 2bf1005ea..e383b2dc0 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -74,18 +74,19 @@ async function handleNative(
     }>) {
       if (!rawEvent.data) continue
 
-      // Parse to log but forward the original JSON verbatim
+      // Forward verbatim — never block on parse failure
+      await stream.writeSSE({
+        event: rawEvent.event,
+        data: rawEvent.data,
+      })
+
+      // Parse only for debug logging
       try {
         const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData
         consola.debug("Native SSE event:", parsed.type)
-        await stream.writeSSE({
-          event: parsed.type,
-          data: rawEvent.data,
-        })
       } catch {
-        // Malformed chunk — skip
         consola.warn(
-          "Could not parse native SSE chunk:",
+          "Could not parse native SSE chunk for logging:",
           rawEvent.data.slice(0, 200),
         )
       }
diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts
index e6c41cb72..8b9fb54fd 100644
--- a/src/services/copilot/create-messages-native.ts
+++ b/src/services/copilot/create-messages-native.ts
@@ -80,15 +80,16 @@ function buildNativeHeaders(
 ): Record<string, string> {
   const base = copilotHeaders(state, vision)
 
-  // The native /v1/messages endpoint expects these Anthropic headers
+  // Remove headers that are OpenAI-specific and not expected by Anthropic endpoint
+  const { "openai-intent": _dropped, ...anthropicBase } = base
+
   return {
-    ...base,
+    ...anthropicBase,
     "anthropic-version": "2023-06-01",
     // Enable beta features: extended thinking + prompt caching
     "anthropic-beta":
       "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31",
-    // Only request SSE streaming format when the caller is streaming;
-    // non-streaming calls should use the default application/json accept
+    // Only request SSE streaming format when the caller is streaming
     ...(stream ? { accept: "text/event-stream" } : {}),
   }
 }
@@ -121,7 +122,8 @@ export function buildUpstreamPayload(
       return {
         ...rest,
         thinking: { type: "adaptive" },
-        output_config: output_config ?? { effort: "medium" },
+        output_config:
+          output_config?.effort ? output_config : { effort: "medium" },
       }
     }
     // Already adaptive — forward as-is
@@ -133,9 +135,10 @@ export function buildUpstreamPayload(
 }
 
 /**
- * Models that require the new adaptive thinking API.
- * Populated dynamically at dispatch time via `isNativeAnthropicModel()`.
- * This hard-coded check is the fallback.
+ * Returns true for models that require the adaptive thinking API
+ * (`{ type: "adaptive" }` + `output_config.effort`) rather than the
+ * legacy `{ type: "enabled", budget_tokens: N }`.
+ * Currently: claude-opus-4.7 and later.
  */
 function isAdaptiveThinkingModel(model: string): boolean {
   // claude-opus-4.7 and above use adaptive thinking
diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts
index 7c3d4f59b..7d731d01b 100644
--- a/src/services/copilot/native-models.ts
+++ b/src/services/copilot/native-models.ts
@@ -32,24 +32,5 @@ export function isNativeAnthropicModel(modelId: string): boolean {
     // Model not found in list — fall through to prefix heuristic
   }
 
-  return matchesAnthropicPrefix(modelId)
-}
-
-/**
- * Static prefix heuristic used before `state.models` is populated.
- * Covers all current Claude variants served by Copilot.
- */
-function matchesAnthropicPrefix(modelId: string): boolean {
   return modelId.startsWith("claude-")
 }
-
-/**
- * Returns all model IDs that support native Anthropic pass-through.
- * Used for diagnostics and startup logging.
- */
-export function nativeAnthropicModelIds(): ReadonlyArray<string> {
-  if (!state.models?.data) return []
-  return state.models.data
-    .filter((m) => m.vendor === "Anthropic")
-    .map((m) => m.id)
-}
diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts
index 759df813d..6e4f09c72 100644
--- a/tests/native-passthrough.test.ts
+++ b/tests/native-passthrough.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, afterEach } from "bun:test"
+import { describe, test, expect, beforeEach, afterEach } from "bun:test"
 
 import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
@@ -72,17 +72,45 @@ describe("buildUpstreamPayload", () => {
     expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 })
     expect(result).not.toHaveProperty("output_config")
   })
+
+  // T5 — adaptive upgrade with no output_config → defaults to effort:medium
+  test("T5: adaptive upgrade with no output_config defaults to effort:medium", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled", budget_tokens: 1024 },
+      // output_config intentionally absent
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    expect(result.output_config).toEqual({ effort: "medium" })
+  })
+
+  // T6 — output_config: {} also triggers default (not bypassed)
+  test("T6: empty output_config triggers medium effort default", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled" },
+      output_config: {},
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    expect(result.output_config).toEqual({ effort: "medium" })
+  })
 })
 
 // ---------------------------------------------------------------------------
 // isNativeAnthropicModel tests
 // ---------------------------------------------------------------------------
 
-// Save original models state and restore after each test
-const originalModels = state.models
+// Per-test state isolation
+let savedModels: typeof state.models
+
+beforeEach(() => {
+  savedModels = state.models
+})
 
 afterEach(() => {
-  state.models = originalModels
+  state.models = savedModels
 })
 
 describe("isNativeAnthropicModel", () => {
@@ -158,3 +186,72 @@ describe("isNativeAnthropicModel", () => {
     expect(isNativeAnthropicModel("claude-something")).toBe(true)
   })
 })
+
+// ---------------------------------------------------------------------------
+// isAdaptiveThinkingModel boundary tests (via buildUpstreamPayload)
+// ---------------------------------------------------------------------------
+
+describe("isAdaptiveThinkingModel boundaries (via buildUpstreamPayload)", () => {
+  // B1 — claude-opus-4.6 is NOT upgraded (one below threshold)
+  test("B1: claude-opus-4.6 does NOT get adaptive upgrade", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.6",
+      thinking: { type: "enabled", budget_tokens: 2048 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 2048 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+
+  // B2 — claude-opus-4.7 IS upgraded (exact threshold)
+  test("B2: claude-opus-4.7 (dot separator) IS upgraded to adaptive", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+  })
+
+  // B3 — claude-opus-4-7 (dash separator) IS upgraded
+  test("B3: claude-opus-4-7 (dash separator) IS upgraded to adaptive", () => {
+    const payload = basePayload({
+      model: "claude-opus-4-7",
+      thinking: { type: "enabled" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+  })
+
+  // B4 — claude-opus-4-6 (dash separator) is NOT upgraded
+  test("B4: claude-opus-4-6 (dash separator) NOT upgraded", () => {
+    const payload = basePayload({
+      model: "claude-opus-4-6",
+      thinking: { type: "enabled", budget_tokens: 512 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 512 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+
+  // B5 — claude-opus-4.8 (one above threshold) IS upgraded
+  test("B5: claude-opus-4.8 (one above threshold) IS upgraded", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.8",
+      thinking: { type: "enabled" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+  })
+
+  // B6 — claude-sonnet-4.7 (non-opus) is NOT upgraded
+  test("B6: claude-sonnet-4.7 (non-opus) NOT upgraded to adaptive", () => {
+    const payload = basePayload({
+      model: "claude-sonnet-4.7",
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+})

From 9409034979a4ad4f9ef680cfc81d56c8f3e7521d Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:22:14 +0800
Subject: [PATCH 05/17] fix: add vsCodeVersion fallback guard in api-config.ts
 (#46)

Export FALLBACK from get-vscode-version.ts and apply
`state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK` in both
copilotHeaders calls, matching the existing copilotChatVersion pattern.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/lib/api-config.ts              | 5 +++--
 src/services/get-vscode-version.ts | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 294959285..09d235bf7 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -1,6 +1,7 @@
 import { randomUUID } from "node:crypto"
 
 import { FALLBACK as COPILOT_CHAT_VERSION_FALLBACK } from "~/services/get-copilot-chat-version"
+import { FALLBACK as VSCODE_VERSION_FALLBACK } from "~/services/get-vscode-version"
 
 import type { State } from "./state"
 
@@ -38,7 +39,7 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
     Authorization: `Bearer ${state.copilotToken}`,
     "content-type": standardHeaders()["content-type"],
     "copilot-integration-id": "vscode-chat",
-    "editor-version": `vscode/${state.vsCodeVersion}`,
+    "editor-version": `vscode/${state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK}`,
     "editor-plugin-version": `copilot-chat/${copilotVersion}`,
     "user-agent": `GitHubCopilotChat/${copilotVersion}`,
     "openai-intent": "conversation-panel",
@@ -59,7 +60,7 @@ export const githubHeaders = (state: State) => {
   return {
     ...standardHeaders(),
     authorization: `token ${state.githubToken}`,
-    "editor-version": `vscode/${state.vsCodeVersion}`,
+    "editor-version": `vscode/${state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK}`,
     "editor-plugin-version": `copilot-chat/${copilotVersion}`,
     "user-agent": `GitHubCopilotChat/${copilotVersion}`,
     "x-github-api-version": API_VERSION,
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index f9732bfed..bfb92de5e 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -2,7 +2,7 @@ import consola from "consola"
 
 import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache"
 
-const FALLBACK = "1.104.3"
+export const FALLBACK = "1.104.3"
 
 let cache: VersionCache | undefined
 

From a36fa096004985394f21a7f9017977b4c2d4c7e5 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:26:39 +0800
Subject: [PATCH 06/17] feat(responses): route scaffolding + reasoning_effort
 types (#2, #7)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts               |  27 ++++
 src/routes/responses/route.ts                 |   9 ++
 src/routes/responses/types.ts                 | 125 ++++++++++++++++++
 src/server.ts                                 |   5 +
 .../copilot/create-chat-completions.ts        |   3 +
 5 files changed, 169 insertions(+)
 create mode 100644 src/routes/responses/handler.ts
 create mode 100644 src/routes/responses/route.ts
 create mode 100644 src/routes/responses/types.ts

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
new file mode 100644
index 000000000..0f32dcb18
--- /dev/null
+++ b/src/routes/responses/handler.ts
@@ -0,0 +1,27 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+
+import type { ResponsesPayload } from "./types"
+
+export async function handleResponses(c: Context): Promise<Response> {
+  const payload = await c.req.json<ResponsesPayload>()
+  consola.debug("Responses API request payload:", JSON.stringify(payload))
+
+  // TODO(#4): wire up createResponses() service client
+  // For now return a structured 501 so the route is exercisable
+  consola.warn(
+    "POST /v1/responses is not yet implemented — service client pending (#4)",
+  )
+  return c.json(
+    {
+      error: {
+        message:
+          "Responses API service client not yet implemented. See issue #4.",
+        type: "not_implemented",
+        code: "responses_not_implemented",
+      },
+    },
+    501,
+  )
+}
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 000000000..ac6aa20f1
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,9 @@
+import { Hono } from "hono"
+
+import { handleResponses } from "./handler"
+
+const responses = new Hono()
+
+responses.post("/", handleResponses)
+
+export default responses
diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts
new file mode 100644
index 000000000..8953e55de
--- /dev/null
+++ b/src/routes/responses/types.ts
@@ -0,0 +1,125 @@
+// Request types
+export interface ResponsesPayload {
+  model: string
+  input: Array<ResponsesInputItem>
+  instructions?: string
+  tools?: Array<ResponsesTool>
+  tool_choice?:
+    | "auto"
+    | "none"
+    | "required"
+    | { type: "function"; name: string }
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  reasoning?: {
+    effort?: "minimal" | "low" | "medium" | "high"
+    summary?: "auto" | "concise" | "detailed"
+  } | null
+  previous_response_id?: string | null
+  store?: boolean | null
+  include?: Array<string> | null
+  stream?: boolean | null
+  metadata?: Record<string, string> | null
+  parallel_tool_calls?: boolean | null
+  service_tier?: "auto" | "default" | null
+  truncation?: "auto" | "disabled" | null
+  user?: string | null
+}
+
+// Input item types (union)
+export type ResponsesInputItem =
+  | ResponsesInputMessage
+  | ResponsesFunctionCallOutput
+  | ResponsesReasoningItem
+
+export interface ResponsesInputMessage {
+  type: "message"
+  role: "user" | "assistant" | "system" | "developer"
+  content: string | Array<ResponsesContentPart>
+  id?: string
+  status?: "completed" | "incomplete" | null
+}
+
+export interface ResponsesFunctionCallOutput {
+  type: "function_call_output"
+  call_id: string
+  output: string
+}
+
+export interface ResponsesReasoningItem {
+  type: "reasoning"
+  id: string
+  encrypted_content?: string
+  summary?: Array<{ type: "summary_text"; text: string }>
+  status?: "completed" | "in_progress" | "incomplete" | null
+}
+
+export type ResponsesContentPart =
+  | { type: "input_text"; text: string }
+  | { type: "input_image"; image_url: string; detail?: "low" | "high" | "auto" }
+
+// Tool types
+export interface ResponsesTool {
+  type: "function"
+  name: string
+  description?: string
+  parameters?: Record<string, unknown>
+  strict?: boolean
+}
+
+// Output item types (response)
+export type ResponsesOutputItem =
+  | ResponsesOutputMessage
+  | ResponsesOutputFunctionCall
+  | ResponsesOutputReasoning
+
+export interface ResponsesOutputMessage {
+  type: "message"
+  id: string
+  role: "assistant"
+  content: Array<ResponsesOutputContentPart>
+  status: "completed" | "incomplete" | "in_progress"
+}
+
+export interface ResponsesOutputFunctionCall {
+  type: "function_call"
+  id: string
+  call_id: string
+  name: string
+  arguments: string
+  status: "completed" | "incomplete" | "in_progress"
+}
+
+export interface ResponsesOutputReasoning {
+  type: "reasoning"
+  id: string
+  encrypted_content?: string
+  summary?: Array<{ type: "summary_text"; text: string }>
+  status: "completed" | "incomplete" | "in_progress"
+}
+
+export type ResponsesOutputContentPart =
+  | { type: "output_text"; text: string; annotations?: Array<unknown> }
+  | { type: "refusal"; refusal: string }
+
+// Response type
+export interface ResponsesResponse {
+  id: string
+  object: "response"
+  created_at: number
+  model: string
+  status: "completed" | "incomplete" | "in_progress" | "failed"
+  output: Array<ResponsesOutputItem>
+  usage?: {
+    input_tokens: number
+    output_tokens: number
+    total_tokens: number
+    input_tokens_details?: { cached_tokens?: number }
+    output_tokens_details?: { reasoning_tokens?: number }
+  }
+  error?: { code: string; message: string } | null
+  incomplete_details?: { reason: string } | null
+  metadata?: Record<string, string> | null
+  service_tier?: string
+}
diff --git a/src/server.ts b/src/server.ts
index 462a278f3..6e6b6a878 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import responses from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -29,3 +30,7 @@ server.route("/v1/embeddings", embeddingRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
+
+// OpenAI Responses API
+server.route("/responses", responses)
+server.route("/v1/responses", responses)
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 8534151da..fc96517e4 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -71,6 +71,7 @@ export interface ChatCompletionChunk {
 
 interface Delta {
   content?: string | null
+  reasoning_content?: string | null
   role?: "user" | "assistant" | "system" | "tool"
   tool_calls?: Array<{
     index: number
@@ -112,6 +113,7 @@ export interface ChatCompletionResponse {
 interface ResponseMessage {
   role: "assistant"
   content: string | null
+  reasoning_content?: string | null
   tool_calls?: Array<ToolCall>
 }
 
@@ -148,6 +150,7 @@ export interface ChatCompletionsPayload {
     | { type: "function"; function: { name: string } }
     | null
   user?: string | null
+  reasoning_effort?: "minimal" | "low" | "medium" | "high" | null
 }
 
 export interface Tool {

From d417a7c53c1c271b392fd9d578d8c38c1d97d3b1 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:31:50 +0800
Subject: [PATCH 07/17] fix(responses): type fixes + error handling + route
 tests (#2, #7)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts               |  9 +++--
 src/routes/responses/types.ts                 | 19 ++++++++-
 .../copilot/create-chat-completions.ts        |  2 +-
 tests/responses-route.test.ts                 | 39 +++++++++++++++++++
 4 files changed, 63 insertions(+), 6 deletions(-)
 create mode 100644 tests/responses-route.test.ts

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 0f32dcb18..e976252be 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -5,11 +5,14 @@ import consola from "consola"
 import type { ResponsesPayload } from "./types"
 
 export async function handleResponses(c: Context): Promise<Response> {
-  const payload = await c.req.json<ResponsesPayload>()
-  consola.debug("Responses API request payload:", JSON.stringify(payload))
+  try {
+    const payload = await c.req.json<ResponsesPayload>()
+    consola.debug("Responses API request payload:", JSON.stringify(payload))
+  } catch {
+    consola.debug("Responses API request received (could not parse body)")
+  }
 
   // TODO(#4): wire up createResponses() service client
-  // For now return a structured 501 so the route is exercisable
   consola.warn(
     "POST /v1/responses is not yet implemented — service client pending (#4)",
   )
diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts
index 8953e55de..a5241129b 100644
--- a/src/routes/responses/types.ts
+++ b/src/routes/responses/types.ts
@@ -13,7 +13,7 @@ export interface ResponsesPayload {
   top_p?: number | null
   max_output_tokens?: number | null
   reasoning?: {
-    effort?: "minimal" | "low" | "medium" | "high"
+    effort?: "low" | "medium" | "high"
     summary?: "auto" | "concise" | "detailed"
   } | null
   previous_response_id?: string | null
@@ -30,6 +30,7 @@ export interface ResponsesPayload {
 // Input item types (union)
 export type ResponsesInputItem =
   | ResponsesInputMessage
+  | ResponsesInputFunctionCall
   | ResponsesFunctionCallOutput
   | ResponsesReasoningItem
 
@@ -41,6 +42,15 @@ export interface ResponsesInputMessage {
   status?: "completed" | "incomplete" | null
 }
 
+export interface ResponsesInputFunctionCall {
+  type: "function_call"
+  id?: string
+  call_id: string
+  name: string
+  arguments: string
+  status?: "completed" | "in_progress" | "incomplete" | null
+}
+
 export interface ResponsesFunctionCallOutput {
   type: "function_call_output"
   call_id: string
@@ -57,7 +67,12 @@ export interface ResponsesReasoningItem {
 
 export type ResponsesContentPart =
   | { type: "input_text"; text: string }
-  | { type: "input_image"; image_url: string; detail?: "low" | "high" | "auto" }
+  | {
+      type: "input_image"
+      image_url?: string | null
+      file_id?: string | null
+      detail?: "low" | "high" | "auto"
+    }
 
 // Tool types
 export interface ResponsesTool {
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index fc96517e4..ea4eb5d55 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -150,7 +150,7 @@ export interface ChatCompletionsPayload {
     | { type: "function"; function: { name: string } }
     | null
   user?: string | null
-  reasoning_effort?: "minimal" | "low" | "medium" | "high" | null
+  reasoning_effort?: "low" | "medium" | "high" | null
 }
 
 export interface Tool {
diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts
new file mode 100644
index 000000000..8f5b496c3
--- /dev/null
+++ b/tests/responses-route.test.ts
@@ -0,0 +1,39 @@
+import { describe, test, expect } from "bun:test"
+
+import { server } from "../src/server"
+
+describe("POST /v1/responses stub", () => {
+  test("returns 501 with structured error body", async () => {
+    const res = await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: "gpt-4o", input: [] }),
+    })
+    expect(res.status).toBe(501)
+    const body = (await res.json()) as {
+      error: { type: string; code: string; message: string }
+    }
+    expect(body.error.type).toBe("not_implemented")
+    expect(body.error.code).toBe("responses_not_implemented")
+    expect(typeof body.error.message).toBe("string")
+  })
+
+  test("bare /responses path also returns 501", async () => {
+    const res = await server.request("/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: "gpt-4o", input: [] }),
+    })
+    expect(res.status).toBe(501)
+    const body = (await res.json()) as { error: { code: string } }
+    expect(body.error.code).toBe("responses_not_implemented")
+  })
+
+  test("empty body returns 501 (not 500)", async () => {
+    const res = await server.request("/v1/responses", {
+      method: "POST",
+      // no body
+    })
+    expect(res.status).toBe(501)
+  })
+})

From 977a30ff0f553498f58ba75b2251492cbba40968 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:37:25 +0800
Subject: [PATCH 08/17] fix(responses): complete type coverage for Responses
 API (#2)

- input: accept string | Array<ResponsesInputItem> (string shorthand)
- ResponsesContentPart: add input_file variant (file_id / file_data)
- ResponsesResponse.status: add "cancelled"
- ResponsesFunctionCallOutput: add optional id field
- service_tier: narrow to "default" | "flex" | string
- handler: downgrade warn to info (stub log not a health signal)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts |  2 +-
 src/routes/responses/types.ts   | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index e976252be..6e0a257b0 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -13,7 +13,7 @@ export async function handleResponses(c: Context): Promise<Response> {
   }
 
   // TODO(#4): wire up createResponses() service client
-  consola.warn(
+  consola.info(
     "POST /v1/responses is not yet implemented — service client pending (#4)",
   )
   return c.json(
diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts
index a5241129b..642df7719 100644
--- a/src/routes/responses/types.ts
+++ b/src/routes/responses/types.ts
@@ -1,7 +1,7 @@
 // Request types
 export interface ResponsesPayload {
   model: string
-  input: Array<ResponsesInputItem>
+  input: string | Array<ResponsesInputItem>
   instructions?: string
   tools?: Array<ResponsesTool>
   tool_choice?:
@@ -53,6 +53,7 @@ export interface ResponsesInputFunctionCall {
 
 export interface ResponsesFunctionCallOutput {
   type: "function_call_output"
+  id?: string
   call_id: string
   output: string
 }
@@ -73,6 +74,14 @@ export type ResponsesContentPart =
       file_id?: string | null
       detail?: "low" | "high" | "auto"
     }
+  | {
+      type: "input_file"
+      /** Pre-uploaded file via Files API */
+      file_id?: string
+      /** Base64-encoded inline file content */
+      file_data?: string
+      filename?: string
+    }
 
 // Tool types
 export interface ResponsesTool {
@@ -124,7 +133,7 @@ export interface ResponsesResponse {
   object: "response"
   created_at: number
   model: string
-  status: "completed" | "incomplete" | "in_progress" | "failed"
+  status: "completed" | "incomplete" | "in_progress" | "failed" | "cancelled"
   output: Array<ResponsesOutputItem>
   usage?: {
     input_tokens: number
@@ -136,5 +145,5 @@ export interface ResponsesResponse {
   error?: { code: string; message: string } | null
   incomplete_details?: { reason: string } | null
   metadata?: Record<string, string> | null
-  service_tier?: string
+  service_tier?: "default" | "flex" | (string & {})
 }

From 65a4522425053061c99708a20e263a62138f1104 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:47:38 +0800
Subject: [PATCH 09/17] feat(responses): add upstream service client + wire
 handler (#4)

Implements createResponses() service client modelled on createChatCompletions,
with inputHasImages/isAgentCall helpers and X-Initiator header logic; wires the
/responses handler to call the real service client with manualApprove gate and
full streaming/non-streaming SSE proxy; updates route tests to cover the live
handler behaviour instead of the old 501 stub.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts          | 78 +++++++++++++++-----
 src/services/copilot/create-responses.ts | 93 ++++++++++++++++++++++++
 tests/responses-route.test.ts            | 92 ++++++++++++++++++-----
 3 files changed, 226 insertions(+), 37 deletions(-)
 create mode 100644 src/services/copilot/create-responses.ts

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 6e0a257b0..4a490fe38 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -1,30 +1,72 @@
 import type { Context } from "hono"
 
 import consola from "consola"
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { state } from "~/lib/state"
+import { createResponses } from "~/services/copilot/create-responses"
 
 import type { ResponsesPayload } from "./types"
 
 export async function handleResponses(c: Context): Promise<Response> {
+  let payload: ResponsesPayload
   try {
-    const payload = await c.req.json<ResponsesPayload>()
-    consola.debug("Responses API request payload:", JSON.stringify(payload))
+    payload = await c.req.json<ResponsesPayload>()
   } catch {
-    consola.debug("Responses API request received (could not parse body)")
+    return c.json(
+      {
+        error: {
+          message: "Invalid JSON body",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      },
+      400,
+    )
   }
 
-  // TODO(#4): wire up createResponses() service client
-  consola.info(
-    "POST /v1/responses is not yet implemented — service client pending (#4)",
-  )
-  return c.json(
-    {
-      error: {
-        message:
-          "Responses API service client not yet implemented. See issue #4.",
-        type: "not_implemented",
-        code: "responses_not_implemented",
-      },
-    },
-    501,
-  )
+  consola.debug("Responses API request payload:", JSON.stringify(payload))
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createResponses(payload)
+
+  if (!payload.stream) {
+    consola.debug(
+      "Responses non-streaming response:",
+      JSON.stringify(response).slice(0, 400),
+    )
+    return c.json(response)
+  }
+
+  // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through)
+  consola.debug("Responses streaming response — proxying SSE events")
+  return streamSSE(c, async (stream) => {
+    for await (const rawEvent of response as AsyncIterable<{
+      data?: string
+      event?: string
+    }>) {
+      if (!rawEvent.data) continue
+
+      // Forward verbatim first
+      await stream.writeSSE({
+        event: rawEvent.event,
+        data: rawEvent.data,
+      })
+
+      // Parse only for debug logging
+      try {
+        const parsed = JSON.parse(rawEvent.data) as { type: string }
+        consola.debug("Responses SSE event:", parsed.type)
+      } catch {
+        consola.warn(
+          "Could not parse Responses SSE chunk for logging:",
+          rawEvent.data.slice(0, 200),
+        )
+      }
+    }
+  })
 }
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 000000000..3163182ec
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,93 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type {
+  ResponsesContentPart,
+  ResponsesInputItem,
+  ResponsesPayload,
+  ResponsesResponse,
+} from "~/routes/responses/types"
+
+import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Returns true if any input item contains an `input_image` content part.
+ * Handles both a top-level string input and an array of input items.
+ */
+export function inputHasImages(payload: ResponsesPayload): boolean {
+  if (typeof payload.input === "string") return false
+
+  return payload.input.some((item) => {
+    if (item.type !== "message") return false
+    if (typeof item.content === "string") return false
+    return item.content.some(
+      (part: ResponsesContentPart) => part.type === "input_image",
+    )
+  })
+}
+
+/**
+ * Returns true if this looks like an agent/multi-turn call:
+ * - any input item has role "assistant", OR
+ * - any item has type "function_call_output" or "function_call"
+ */
+export function isAgentCall(payload: ResponsesPayload): boolean {
+  if (typeof payload.input === "string") return false
+
+  return payload.input.some(
+    (item: ResponsesInputItem) =>
+      ("role" in item && item.role === "assistant")
+      || item.type === "function_call_output"
+      || item.type === "function_call",
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Service client
+// ---------------------------------------------------------------------------
+
+export const createResponses = async (payload: ResponsesPayload) => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const enableVision = inputHasImages(payload)
+
+  const initiator = isAgentCall(payload) ? "agent" : "user"
+
+  // TODO(#11): add Copilot-Vision-Request header when vision detected
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, enableVision),
+    "X-Initiator": initiator,
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create responses", response)
+    throw new HTTPError("Failed to create responses", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponsesResponse
+}
+
+// ---------------------------------------------------------------------------
+// Streaming event types for Responses API SSE
+// ---------------------------------------------------------------------------
+
+export interface ResponseStreamEvent {
+  type: string // "response.created" | "response.output_text.delta" | etc.
+  [key: string]: unknown
+}
diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts
index 8f5b496c3..c5e396224 100644
--- a/tests/responses-route.test.ts
+++ b/tests/responses-route.test.ts
@@ -1,39 +1,93 @@
-import { describe, test, expect } from "bun:test"
+import { describe, test, expect, mock, beforeAll } from "bun:test"
 
+import { state } from "../src/lib/state"
 import { server } from "../src/server"
 
-describe("POST /v1/responses stub", () => {
-  test("returns 501 with structured error body", async () => {
+// ---------------------------------------------------------------------------
+// Global fetch mock — returns a minimal non-streaming Responses API response
+// ---------------------------------------------------------------------------
+
+const mockResponseBody = {
+  id: "resp_test",
+  object: "response",
+  created_at: 1_700_000_000,
+  model: "gpt-4o",
+  status: "completed",
+  output: [],
+}
+
+const fetchMock = mock(() =>
+  Promise.resolve({
+    ok: true,
+    json: () => Promise.resolve(mockResponseBody),
+  }),
+)
+
+// @ts-expect-error – mock doesn't implement full fetch signature
+;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock
+
+// Set up copilot token so createResponses doesn't throw
+beforeAll(() => {
+  state.copilotToken = "test-token"
+  state.vsCodeVersion = "1.99.0"
+  state.accountType = "individual"
+  state.manualApprove = false
+})
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/responses — wired handler", () => {
+  test("non-streaming request returns upstream JSON", async () => {
     const res = await server.request("/v1/responses", {
       method: "POST",
       headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ model: "gpt-4o", input: [] }),
+      body: JSON.stringify({ model: "gpt-4o", input: [], stream: false }),
     })
-    expect(res.status).toBe(501)
-    const body = (await res.json()) as {
-      error: { type: string; code: string; message: string }
-    }
-    expect(body.error.type).toBe("not_implemented")
-    expect(body.error.code).toBe("responses_not_implemented")
-    expect(typeof body.error.message).toBe("string")
+    expect(res.status).toBe(200)
+    const body = (await res.json()) as typeof mockResponseBody
+    expect(body.object).toBe("response")
+    expect(body.id).toBe("resp_test")
   })
 
-  test("bare /responses path also returns 501", async () => {
+  test("same endpoint reachable at bare /responses path", async () => {
     const res = await server.request("/responses", {
       method: "POST",
       headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ model: "gpt-4o", input: [] }),
+      body: JSON.stringify({ model: "gpt-4o", input: [], stream: false }),
     })
-    expect(res.status).toBe(501)
-    const body = (await res.json()) as { error: { code: string } }
-    expect(body.error.code).toBe("responses_not_implemented")
+    expect(res.status).toBe(200)
   })
 
-  test("empty body returns 501 (not 500)", async () => {
+  test("invalid JSON body returns 400", async () => {
     const res = await server.request("/v1/responses", {
       method: "POST",
-      // no body
+      headers: { "Content-Type": "application/json" },
+      body: "not-json{{{",
     })
-    expect(res.status).toBe(501)
+    expect(res.status).toBe(400)
+    const body = (await res.json()) as {
+      error: { type: string; code: string }
+    }
+    expect(body.error.type).toBe("invalid_request_error")
+    expect(body.error.code).toBe("invalid_json")
+  })
+
+  test("missing copilot token returns 500", async () => {
+    // Temporarily clear the token via a describe-level wrapper so the
+    // assignment happens synchronously (no await between read and write).
+    const tokenBackup = state.copilotToken
+    state.copilotToken = undefined // synchronous — no race condition
+
+    const res = await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: "gpt-4o", input: [] }),
+    })
+    expect(res.status).toBe(500)
+
+    // eslint-disable-next-line require-atomic-updates
+    state.copilotToken = tokenBackup
   })
 })

From b134dfa8f0df3f40f94efc1c980d57b50cb4e4fa Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 14:10:26 +0800
Subject: [PATCH 10/17] fix(responses): dead export, explicit types,
 X-Initiator and error tests (#4)

- Delete unused `ResponseStreamEvent` interface from create-responses.ts
- Add explicit `Promise<ResponsesResponse | AsyncGenerator<ServerSentEventMessage, void, unknown>>` return type to `createResponses`
- Wire `forwardError` into responses route so upstream 4xx/5xx propagates correctly
- Expand tests/responses-route.test.ts: new "createResponses behavior" describe block with X-Initiator=agent (assistant message), X-Initiator=user (pure user), X-Initiator=agent (function_call_output), and upstream 429 error path
- Remove spurious `// eslint-disable-next-line require-atomic-updates` comment; fix underlying lint issue

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/route.ts            |  10 +-
 src/services/copilot/create-responses.ts |  17 ++-
 tests/responses-route.test.ts            | 150 +++++++++++++++++++++--
 3 files changed, 159 insertions(+), 18 deletions(-)

diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
index ac6aa20f1..4be774e59 100644
--- a/src/routes/responses/route.ts
+++ b/src/routes/responses/route.ts
@@ -1,9 +1,17 @@
 import { Hono } from "hono"
 
+import { forwardError } from "~/lib/error"
+
 import { handleResponses } from "./handler"
 
 const responses = new Hono()
 
-responses.post("/", handleResponses)
+responses.post("/", async (c) => {
+  try {
+    return await handleResponses(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
 
 export default responses
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 3163182ec..5bd1152b6 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -1,3 +1,5 @@
+import type { ServerSentEventMessage } from "fetch-event-stream"
+
 import consola from "consola"
 import { events } from "fetch-event-stream"
 
@@ -52,7 +54,11 @@ export function isAgentCall(payload: ResponsesPayload): boolean {
 // Service client
 // ---------------------------------------------------------------------------
 
-export const createResponses = async (payload: ResponsesPayload) => {
+export const createResponses = async (
+  payload: ResponsesPayload,
+): Promise<
+  ResponsesResponse | AsyncGenerator<ServerSentEventMessage, void, unknown>
+> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
   const enableVision = inputHasImages(payload)
@@ -82,12 +88,3 @@ export const createResponses = async (payload: ResponsesPayload) => {
 
   return (await response.json()) as ResponsesResponse
 }
-
-// ---------------------------------------------------------------------------
-// Streaming event types for Responses API SSE
-// ---------------------------------------------------------------------------
-
-export interface ResponseStreamEvent {
-  type: string // "response.created" | "response.output_text.delta" | etc.
-  [key: string]: unknown
-}
diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts
index c5e396224..78421d4d9 100644
--- a/tests/responses-route.test.ts
+++ b/tests/responses-route.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, mock, beforeAll } from "bun:test"
+import { describe, test, expect, mock, beforeAll, beforeEach } from "bun:test"
 
 import { state } from "../src/lib/state"
 import { server } from "../src/server"
@@ -75,10 +75,8 @@ describe("POST /v1/responses — wired handler", () => {
   })
 
   test("missing copilot token returns 500", async () => {
-    // Temporarily clear the token via a describe-level wrapper so the
-    // assignment happens synchronously (no await between read and write).
-    const tokenBackup = state.copilotToken
-    state.copilotToken = undefined // synchronous — no race condition
+    // Temporarily clear the token — write is synchronous, no await in between.
+    state.copilotToken = undefined
 
     const res = await server.request("/v1/responses", {
       method: "POST",
@@ -87,7 +85,145 @@ describe("POST /v1/responses — wired handler", () => {
     })
     expect(res.status).toBe(500)
 
-    // eslint-disable-next-line require-atomic-updates
-    state.copilotToken = tokenBackup
+    state.copilotToken = "test-token"
+  })
+})
+
+// ---------------------------------------------------------------------------
+// createResponses behavior: X-Initiator header and error propagation
+// ---------------------------------------------------------------------------
+
+describe("createResponses behavior", () => {
+  // Restore state before each test in this block
+  beforeEach(() => {
+    state.copilotToken = "test-token"
+    state.vsCodeVersion = "1.99.0"
+    state.accountType = "individual"
+    state.manualApprove = false
+  })
+
+  test("X-Initiator = agent when assistant message present", async () => {
+    const captureMock = mock(
+      (_url: string, opts: { headers: Record<string, string> }) =>
+        Promise.resolve({
+          ok: true,
+          json: () => Promise.resolve(mockResponseBody),
+          headers: opts.headers,
+        }),
+    )
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = captureMock
+
+    await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        stream: false,
+        input: [
+          { type: "message", role: "user", content: "hello" },
+          { type: "message", role: "assistant", content: "hi there" },
+        ],
+      }),
+    })
+
+    expect(captureMock).toHaveBeenCalled()
+    const sentHeaders = (
+      captureMock.mock.calls[0][1] as { headers: Record<string, string> }
+    ).headers
+    expect(sentHeaders["X-Initiator"]).toBe("agent")
+
+    // Restore default mock
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = fetchMock
+  })
+
+  test("X-Initiator = user for pure user messages", async () => {
+    const captureMock = mock(
+      (_url: string, opts: { headers: Record<string, string> }) =>
+        Promise.resolve({
+          ok: true,
+          json: () => Promise.resolve(mockResponseBody),
+          headers: opts.headers,
+        }),
+    )
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = captureMock
+
+    await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        stream: false,
+        input: [{ type: "message", role: "user", content: "just a user" }],
+      }),
+    })
+
+    expect(captureMock).toHaveBeenCalled()
+    const sentHeaders = (
+      captureMock.mock.calls[0][1] as { headers: Record<string, string> }
+    ).headers
+    expect(sentHeaders["X-Initiator"]).toBe("user")
+
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = fetchMock
+  })
+
+  test("X-Initiator = agent for function_call_output item", async () => {
+    const captureMock = mock(
+      (_url: string, opts: { headers: Record<string, string> }) =>
+        Promise.resolve({
+          ok: true,
+          json: () => Promise.resolve(mockResponseBody),
+          headers: opts.headers,
+        }),
+    )
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = captureMock
+
+    await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        stream: false,
+        input: [
+          { type: "function_call_output", call_id: "call_1", output: "{}" },
+        ],
+      }),
+    })
+
+    expect(captureMock).toHaveBeenCalled()
+    const sentHeaders = (
+      captureMock.mock.calls[0][1] as { headers: Record<string, string> }
+    ).headers
+    expect(sentHeaders["X-Initiator"]).toBe("agent")
+
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = fetchMock
+  })
+
+  test("upstream 4xx returns error response", async () => {
+    const errorMock = mock(() =>
+      Promise.resolve({
+        ok: false,
+        status: 429,
+        text: () => Promise.resolve("rate limited"),
+      }),
+    )
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = errorMock
+
+    const res = await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: "gpt-4o", stream: false, input: [] }),
+    })
+
+    expect(res.status).toBe(429)
+
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = fetchMock
   })
 })

From 8341b89fcaf82366766619a5e61a3715b7b9bb83 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 14:19:56 +0800
Subject: [PATCH 11/17] fix(responses): streamSSE error handler, reasoning
 agent detection, test hygiene (#4)

- Add onError callback to streamSSE so mid-stream upstream failures
  are logged and surface an error event to the client instead of
  silently dropping the connection
- Extend isAgentCall to treat reasoning items as agent context
  (they only appear when echoing prior-turn encrypted reasoning)
- Wrap missing-token test in try/finally to prevent state leakage
- Add X-Initiator=agent test for reasoning item input

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts          | 54 +++++++++++++---------
 src/services/copilot/create-responses.ts |  6 ++-
 tests/responses-route.test.ts            | 59 ++++++++++++++++++++----
 3 files changed, 85 insertions(+), 34 deletions(-)

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 4a490fe38..1307f6dd7 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -44,29 +44,39 @@ export async function handleResponses(c: Context): Promise<Response> {
 
   // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through)
   consola.debug("Responses streaming response — proxying SSE events")
-  return streamSSE(c, async (stream) => {
-    for await (const rawEvent of response as AsyncIterable<{
-      data?: string
-      event?: string
-    }>) {
-      if (!rawEvent.data) continue
+  return streamSSE(
+    c,
+    async (stream) => {
+      for await (const rawEvent of response as AsyncIterable<{
+        data?: string
+        event?: string
+      }>) {
+        if (!rawEvent.data) continue
 
-      // Forward verbatim first
-      await stream.writeSSE({
-        event: rawEvent.event,
-        data: rawEvent.data,
-      })
+        // Forward verbatim first
+        await stream.writeSSE({
+          event: rawEvent.event,
+          data: rawEvent.data,
+        })
 
-      // Parse only for debug logging
-      try {
-        const parsed = JSON.parse(rawEvent.data) as { type: string }
-        consola.debug("Responses SSE event:", parsed.type)
-      } catch {
-        consola.warn(
-          "Could not parse Responses SSE chunk for logging:",
-          rawEvent.data.slice(0, 200),
-        )
+        // Parse only for debug logging
+        try {
+          const parsed = JSON.parse(rawEvent.data) as { type: string }
+          consola.debug("Responses SSE event:", parsed.type)
+        } catch {
+          consola.warn(
+            "Could not parse Responses SSE chunk for logging:",
+            rawEvent.data.slice(0, 200),
+          )
+        }
       }
-    }
-  })
+    },
+    async (err, stream) => {
+      consola.error("Responses SSE stream error:", err)
+      await stream.writeSSE({
+        event: "error",
+        data: JSON.stringify({ message: String(err) }),
+      })
+    },
+  )
 }
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 5bd1152b6..385b2a210 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -37,7 +37,8 @@ export function inputHasImages(payload: ResponsesPayload): boolean {
 /**
  * Returns true if this looks like an agent/multi-turn call:
  * - any input item has role "assistant", OR
- * - any item has type "function_call_output" or "function_call"
+ * - any item has type "function_call_output", "function_call", or "reasoning"
+ *   (reasoning items only appear when echoing back prior agentic turn context)
  */
 export function isAgentCall(payload: ResponsesPayload): boolean {
   if (typeof payload.input === "string") return false
@@ -46,7 +47,8 @@ export function isAgentCall(payload: ResponsesPayload): boolean {
     (item: ResponsesInputItem) =>
       ("role" in item && item.role === "assistant")
       || item.type === "function_call_output"
-      || item.type === "function_call",
+      || item.type === "function_call"
+      || item.type === "reasoning",
   )
 }
 
diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts
index 78421d4d9..80329be21 100644
--- a/tests/responses-route.test.ts
+++ b/tests/responses-route.test.ts
@@ -75,17 +75,17 @@ describe("POST /v1/responses — wired handler", () => {
   })
 
   test("missing copilot token returns 500", async () => {
-    // Temporarily clear the token — write is synchronous, no await in between.
     state.copilotToken = undefined
-
-    const res = await server.request("/v1/responses", {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({ model: "gpt-4o", input: [] }),
-    })
-    expect(res.status).toBe(500)
-
-    state.copilotToken = "test-token"
+    try {
+      const res = await server.request("/v1/responses", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ model: "gpt-4o", input: [] }),
+      })
+      expect(res.status).toBe(500)
+    } finally {
+      state.copilotToken = "test-token"
+    }
   })
 })
 
@@ -204,6 +204,45 @@ describe("createResponses behavior", () => {
     globalThis.fetch = fetchMock
   })
 
+  test("X-Initiator = agent for reasoning item (multi-turn context echo)", async () => {
+    const captureMock = mock(
+      (_url: string, opts: { headers: Record<string, string> }) =>
+        Promise.resolve({
+          ok: true,
+          json: () => Promise.resolve(mockResponseBody),
+          headers: opts.headers,
+        }),
+    )
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = captureMock
+
+    await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        stream: false,
+        input: [
+          {
+            type: "reasoning",
+            id: "rs_abc",
+            encrypted_content: "opaque-blob",
+            status: "completed",
+          },
+        ],
+      }),
+    })
+
+    expect(captureMock).toHaveBeenCalled()
+    const sentHeaders = (
+      captureMock.mock.calls[0][1] as { headers: Record<string, string> }
+    ).headers
+    expect(sentHeaders["X-Initiator"]).toBe("agent")
+
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = fetchMock
+  })
+
   test("upstream 4xx returns error response", async () => {
     const errorMock = mock(() =>
       Promise.resolve({

From ac754b020def90371affb3ca7e50046946e24f39 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 14:28:02 +0800
Subject: [PATCH 12/17] fix(responses): afterEach mock cleanup, streaming test,
 type + log polish (#4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- afterEach restores globalThis.fetch so assertion failures can't leak
  mock state into subsequent tests
- Add streaming smoke test: mocks ReadableStream SSE body, asserts
  content-type: text/event-stream and event names forwarded verbatim
- Return type AsyncGenerator -> AsyncIterable (matches events() actual type)
- Suppress [DONE] sentinel warn — expected at every stream end, not a
  parse error

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts          | 11 +++--
 src/services/copilot/create-responses.ts |  4 +-
 tests/responses-route.test.ts            | 63 ++++++++++++++++++------
 3 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 1307f6dd7..01d52ade0 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -64,10 +64,13 @@ export async function handleResponses(c: Context): Promise<Response> {
           const parsed = JSON.parse(rawEvent.data) as { type: string }
           consola.debug("Responses SSE event:", parsed.type)
         } catch {
-          consola.warn(
-            "Could not parse Responses SSE chunk for logging:",
-            rawEvent.data.slice(0, 200),
-          )
+          // [DONE] sentinel is expected at stream end — only warn on unexpected data
+          if (rawEvent.data !== "[DONE]") {
+            consola.warn(
+              "Could not parse Responses SSE chunk for logging:",
+              rawEvent.data.slice(0, 200),
+            )
+          }
         }
       }
     },
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 385b2a210..1bf7d4aba 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -58,9 +58,7 @@ export function isAgentCall(payload: ResponsesPayload): boolean {
 
 export const createResponses = async (
   payload: ResponsesPayload,
-): Promise<
-  ResponsesResponse | AsyncGenerator<ServerSentEventMessage, void, unknown>
-> => {
+): Promise<ResponsesResponse | AsyncIterable<ServerSentEventMessage>> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
   const enableVision = inputHasImages(payload)
diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts
index 80329be21..eccc085ca 100644
--- a/tests/responses-route.test.ts
+++ b/tests/responses-route.test.ts
@@ -1,4 +1,12 @@
-import { describe, test, expect, mock, beforeAll, beforeEach } from "bun:test"
+import {
+  describe,
+  test,
+  expect,
+  mock,
+  beforeAll,
+  beforeEach,
+  afterEach,
+} from "bun:test"
 
 import { state } from "../src/lib/state"
 import { server } from "../src/server"
@@ -94,7 +102,7 @@ describe("POST /v1/responses — wired handler", () => {
 // ---------------------------------------------------------------------------
 
 describe("createResponses behavior", () => {
-  // Restore state before each test in this block
+  // Restore state and fetch mock before/after each test in this block
   beforeEach(() => {
     state.copilotToken = "test-token"
     state.vsCodeVersion = "1.99.0"
@@ -102,6 +110,11 @@ describe("createResponses behavior", () => {
     state.manualApprove = false
   })
 
+  afterEach(() => {
+    // @ts-expect-error – mock doesn't implement full fetch signature
+    globalThis.fetch = fetchMock
+  })
+
   test("X-Initiator = agent when assistant message present", async () => {
     const captureMock = mock(
       (_url: string, opts: { headers: Record<string, string> }) =>
@@ -132,10 +145,6 @@ describe("createResponses behavior", () => {
       captureMock.mock.calls[0][1] as { headers: Record<string, string> }
     ).headers
     expect(sentHeaders["X-Initiator"]).toBe("agent")
-
-    // Restore default mock
-    // @ts-expect-error – mock doesn't implement full fetch signature
-    globalThis.fetch = fetchMock
   })
 
   test("X-Initiator = user for pure user messages", async () => {
@@ -165,9 +174,6 @@ describe("createResponses behavior", () => {
       captureMock.mock.calls[0][1] as { headers: Record<string, string> }
     ).headers
     expect(sentHeaders["X-Initiator"]).toBe("user")
-
-    // @ts-expect-error – mock doesn't implement full fetch signature
-    globalThis.fetch = fetchMock
   })
 
   test("X-Initiator = agent for function_call_output item", async () => {
@@ -199,9 +205,6 @@ describe("createResponses behavior", () => {
       captureMock.mock.calls[0][1] as { headers: Record<string, string> }
     ).headers
     expect(sentHeaders["X-Initiator"]).toBe("agent")
-
-    // @ts-expect-error – mock doesn't implement full fetch signature
-    globalThis.fetch = fetchMock
   })
 
   test("X-Initiator = agent for reasoning item (multi-turn context echo)", async () => {
@@ -238,9 +241,6 @@ describe("createResponses behavior", () => {
       captureMock.mock.calls[0][1] as { headers: Record<string, string> }
     ).headers
     expect(sentHeaders["X-Initiator"]).toBe("agent")
-
-    // @ts-expect-error – mock doesn't implement full fetch signature
-    globalThis.fetch = fetchMock
   })
 
   test("upstream 4xx returns error response", async () => {
@@ -261,8 +261,39 @@ describe("createResponses behavior", () => {
     })
 
     expect(res.status).toBe(429)
+  })
+
+  test("streaming request proxies SSE events and returns text/event-stream", async () => {
+    const sseBody =
+      'event: response.created\ndata: {"type":"response.created"}\n\n'
+      + 'event: response.completed\ndata: {"type":"response.completed"}\n\n'
+      + "data: [DONE]\n\n"
 
+    const streamMock = mock(() =>
+      Promise.resolve({
+        ok: true,
+        headers: new Headers({ "content-type": "text/event-stream" }),
+        body: new ReadableStream({
+          start(controller) {
+            controller.enqueue(new TextEncoder().encode(sseBody))
+            controller.close()
+          },
+        }),
+      }),
+    )
     // @ts-expect-error – mock doesn't implement full fetch signature
-    globalThis.fetch = fetchMock
+    globalThis.fetch = streamMock
+
+    const res = await server.request("/v1/responses", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: "gpt-4o", input: [], stream: true }),
+    })
+
+    expect(res.status).toBe(200)
+    expect(res.headers.get("content-type")).toMatch(/text\/event-stream/)
+    const text = await res.text()
+    expect(text).toContain("response.created")
+    expect(text).toContain("response.completed")
   })
 })

From d7c4b26ba70cf733299c4364b34fe5bf1d8afdc6 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 14:32:07 +0800
Subject: [PATCH 13/17] feat(routing): model-to-endpoint mode classifier (#5)

Add getModelMode/isResponsesOnlyModel to classify Responses-only models
(codex family, o-pro variants), block them at /chat/completions with a
clear 400, and surface a mode field on GET /v1/models.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/lib/model-routing.ts               |  49 ++++++
 src/routes/chat-completions/handler.ts |  14 ++
 src/routes/models/route.ts             |   2 +
 tests/model-routing.test.ts            | 199 +++++++++++++++++++++++++
 4 files changed, 264 insertions(+)
 create mode 100644 src/lib/model-routing.ts
 create mode 100644 tests/model-routing.test.ts

diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts
new file mode 100644
index 000000000..094874663
--- /dev/null
+++ b/src/lib/model-routing.ts
@@ -0,0 +1,49 @@
+/**
+ * Model-to-endpoint routing.
+ *
+ * Copilot upstream serves some models exclusively via the Responses API
+ * (/responses) and others via Chat Completions (/chat/completions).
+ * Sending a Responses-only model to /chat/completions produces an error.
+ *
+ * Detection order:
+ *  1. If state.models is loaded, check model capabilities.type === "responses"
+ *     (if the upstream ever adds this field). Currently Copilot doesn't set it,
+ *     so we fall through to step 2.
+ *  2. Static prefix/suffix list (known Responses-only models as of 2025-05).
+ *
+ * "Responses-only" models: all gpt-5*-codex variants, o1-pro, o3-pro.
+ * Everything else (gpt-4o, gpt-5, o1, o3, o4-mini, claude-*, gemini-*) uses
+ * Chat Completions (or native Anthropic pass-through for Claude).
+ */
+
+import { state } from "~/lib/state"
+
+/** Endpoint mode for routing. */
+export type ModelMode = "chat" | "responses"
+
+/**
+ * Returns the upstream endpoint mode for the given model ID.
+ * "responses" = must use /responses; "chat" = use /chat/completions (or native Anthropic).
+ */
+export function getModelMode(modelId: string): ModelMode {
+  // 1. Check state.models capabilities if available (future-proof)
+  if (state.models?.data) {
+    const entry = state.models.data.find((m) => m.id === modelId)
+    if (entry?.capabilities.type === "responses") return "responses"
+  }
+
+  // 2. Static heuristic: Responses-only models have "codex" in the name
+  //    or are o-series "pro" variants.
+  return isResponsesOnlyModel(modelId) ? "responses" : "chat"
+}
+
+/**
+ * Returns true if the model is known to be Responses-only on Copilot upstream.
+ */
+export function isResponsesOnlyModel(modelId: string): boolean {
+  // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc.
+  if (modelId.includes("codex")) return true
+  // o-pro family: o1-pro, o3-pro
+  if (/^o\d+-pro$/.test(modelId)) return true
+  return false
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..5cbc290c1 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { isResponsesOnlyModel } from "~/lib/model-routing"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -20,6 +21,19 @@ export async function handleCompletion(c: Context) {
   let payload = await c.req.json<ChatCompletionsPayload>()
   consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
+  if (isResponsesOnlyModel(payload.model)) {
+    return c.json(
+      {
+        error: {
+          message: `Model "${payload.model}" is only available via the Responses API. Use POST /v1/responses instead.`,
+          type: "invalid_request_error",
+          code: "responses_only_model",
+        },
+      },
+      400,
+    )
+  }
+
   // Find the selected model
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts
index 5254e2af7..2de4b797c 100644
--- a/src/routes/models/route.ts
+++ b/src/routes/models/route.ts
@@ -1,6 +1,7 @@
 import { Hono } from "hono"
 
 import { forwardError } from "~/lib/error"
+import { getModelMode } from "~/lib/model-routing"
 import { state } from "~/lib/state"
 import { cacheModels } from "~/lib/utils"
 
@@ -21,6 +22,7 @@ modelRoutes.get("/", async (c) => {
       created_at: new Date(0).toISOString(), // No date available from source
       owned_by: model.vendor,
       display_name: model.name,
+      mode: getModelMode(model.id),
     }))
 
     return c.json({
diff --git a/tests/model-routing.test.ts b/tests/model-routing.test.ts
new file mode 100644
index 000000000..2243a0f5f
--- /dev/null
+++ b/tests/model-routing.test.ts
@@ -0,0 +1,199 @@
+import { describe, test, expect, afterEach, beforeAll } from "bun:test"
+
+import { getModelMode, isResponsesOnlyModel } from "../src/lib/model-routing"
+import { state } from "../src/lib/state"
+import { server } from "../src/server"
+
+// ---------------------------------------------------------------------------
+// isResponsesOnlyModel — pure unit tests (no state needed)
+// ---------------------------------------------------------------------------
+
+describe("isResponsesOnlyModel", () => {
+  test("gpt-5-codex → responses-only", () =>
+    expect(isResponsesOnlyModel("gpt-5-codex")).toBe(true))
+  test("gpt-5.1-codex → responses-only", () =>
+    expect(isResponsesOnlyModel("gpt-5.1-codex")).toBe(true))
+  test("gpt-5.1-codex-max → responses-only", () =>
+    expect(isResponsesOnlyModel("gpt-5.1-codex-max")).toBe(true))
+  test("gpt-5.3-codex → responses-only", () =>
+    expect(isResponsesOnlyModel("gpt-5.3-codex")).toBe(true))
+  test("o1-pro → responses-only", () =>
+    expect(isResponsesOnlyModel("o1-pro")).toBe(true))
+  test("o3-pro → responses-only", () =>
+    expect(isResponsesOnlyModel("o3-pro")).toBe(true))
+  test("gpt-4o → chat", () =>
+    expect(isResponsesOnlyModel("gpt-4o")).toBe(false))
+  test("gpt-5 → chat", () => expect(isResponsesOnlyModel("gpt-5")).toBe(false))
+  test("o1 → chat", () => expect(isResponsesOnlyModel("o1")).toBe(false))
+  test("o3 → chat", () => expect(isResponsesOnlyModel("o3")).toBe(false))
+  test("claude-sonnet-4-5 → chat", () =>
+    expect(isResponsesOnlyModel("claude-sonnet-4-5")).toBe(false))
+  test("o4-mini → chat", () =>
+    expect(isResponsesOnlyModel("o4-mini")).toBe(false))
+})
+
+// ---------------------------------------------------------------------------
+// getModelMode — with loaded models list (state mutation)
+// ---------------------------------------------------------------------------
+
+describe("getModelMode — with loaded models list", () => {
+  const savedModels = state.models
+
+  afterEach(() => {
+    state.models = savedModels
+  })
+
+  test("model with capabilities.type=responses in list → responses", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "future-responses-model",
+          vendor: "OpenAI",
+          name: "Future Model",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "gpt",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "responses", // upstream sets this
+          },
+        },
+      ],
+    }
+    expect(getModelMode("future-responses-model")).toBe("responses")
+  })
+
+  test("model with capabilities.type=chat in list → falls through to heuristic", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-5-codex",
+          vendor: "OpenAI",
+          name: "Codex",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "gpt",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "chat",
+          },
+        },
+      ],
+    }
+    // capabilities.type is "chat" so list check doesn't return "responses",
+    // falls through to static heuristic which sees "codex" → responses
+    expect(getModelMode("gpt-5-codex")).toBe("responses")
+  })
+
+  test("regular chat model → chat", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-4o",
+          vendor: "OpenAI",
+          name: "GPT-4o",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "gpt",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "chat",
+          },
+        },
+      ],
+    }
+    expect(getModelMode("gpt-4o")).toBe("chat")
+  })
+
+  test("state.models undefined → heuristic (codex → responses)", () => {
+    state.models = undefined
+    expect(getModelMode("gpt-5-codex")).toBe("responses")
+  })
+
+  test("state.models undefined → heuristic (gpt-4o → chat)", () => {
+    state.models = undefined
+    expect(getModelMode("gpt-4o")).toBe("chat")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// Route-level: POST /v1/chat/completions blocks Responses-only models
+// ---------------------------------------------------------------------------
+
+describe("chat-completions route blocks responses-only models", () => {
+  beforeAll(() => {
+    state.copilotToken = "test-token"
+    state.vsCodeVersion = "1.99.0"
+    state.accountType = "individual"
+    state.manualApprove = false
+  })
+
+  test("gpt-5-codex → 400 with responses_only_model code", async () => {
+    const res = await server.request("/v1/chat/completions", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5-codex",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    })
+    expect(res.status).toBe(400)
+    const body = (await res.json()) as {
+      error: { type: string; code: string; message: string }
+    }
+    expect(body.error.code).toBe("responses_only_model")
+    expect(body.error.type).toBe("invalid_request_error")
+    expect(body.error.message).toContain("gpt-5-codex")
+    expect(body.error.message).toContain("/v1/responses")
+  })
+
+  test("o1-pro → 400 with responses_only_model code", async () => {
+    const res = await server.request("/v1/chat/completions", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "o1-pro",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    })
+    expect(res.status).toBe(400)
+    const body = (await res.json()) as {
+      error: { code: string }
+    }
+    expect(body.error.code).toBe("responses_only_model")
+  })
+
+  test("gpt-5.1-codex-max → 400 with responses_only_model code", async () => {
+    const res = await server.request("/v1/chat/completions", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-5.1-codex-max",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    })
+    expect(res.status).toBe(400)
+    const body = (await res.json()) as {
+      error: { code: string }
+    }
+    expect(body.error.code).toBe("responses_only_model")
+  })
+})

From 394203ab0d76a143059355bf15ed97fc100c16df Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 15:22:35 +0800
Subject: [PATCH 14/17] fix(routing): capabilities short-circuit, dated alias
 regex, guard ordering, test fixes (#5)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/lib/model-routing.ts               |  6 +-
 src/routes/chat-completions/handler.ts |  8 +--
 tests/model-routing.test.ts            | 95 ++++++++++++++++++++++++--
 3 files changed, 97 insertions(+), 12 deletions(-)

diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts
index 094874663..b2aeb3abc 100644
--- a/src/lib/model-routing.ts
+++ b/src/lib/model-routing.ts
@@ -30,6 +30,7 @@ export function getModelMode(modelId: string): ModelMode {
   if (state.models?.data) {
     const entry = state.models.data.find((m) => m.id === modelId)
     if (entry?.capabilities.type === "responses") return "responses"
+    if (entry?.capabilities.type === "chat") return "chat" // trust upstream when explicit
   }
 
   // 2. Static heuristic: Responses-only models have "codex" in the name
@@ -43,7 +44,8 @@ export function getModelMode(modelId: string): ModelMode {
 export function isResponsesOnlyModel(modelId: string): boolean {
   // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc.
   if (modelId.includes("codex")) return true
-  // o-pro family: o1-pro, o3-pro
-  if (/^o\d+-pro$/.test(modelId)) return true
+  // o-pro family: o1-pro, o3-pro, o1-pro-2025-04-09, o3-pro-2025-01-10, etc.
+  // Covers: o\d+-pro(?:-\d{4}-\d{2}-\d{2})? — requires string to end after "pro" or date
+  if (/^o\d+-pro(?:-\d{4}-\d{2}-\d{2})?$/.test(modelId)) return true
   return false
 }
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 5cbc290c1..456d1282b 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -4,7 +4,7 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
-import { isResponsesOnlyModel } from "~/lib/model-routing"
+import { getModelMode } from "~/lib/model-routing"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -16,12 +16,10 @@ import {
 } from "~/services/copilot/create-chat-completions"
 
 export async function handleCompletion(c: Context) {
-  await checkRateLimit(state)
-
   let payload = await c.req.json<ChatCompletionsPayload>()
   consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
-  if (isResponsesOnlyModel(payload.model)) {
+  if (getModelMode(payload.model) === "responses") {
     return c.json(
       {
         error: {
@@ -34,6 +32,8 @@ export async function handleCompletion(c: Context) {
     )
   }
 
+  await checkRateLimit(state)
+
   // Find the selected model
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
diff --git a/tests/model-routing.test.ts b/tests/model-routing.test.ts
index 2243a0f5f..8d758643f 100644
--- a/tests/model-routing.test.ts
+++ b/tests/model-routing.test.ts
@@ -1,4 +1,11 @@
-import { describe, test, expect, afterEach, beforeAll } from "bun:test"
+import {
+  describe,
+  test,
+  expect,
+  afterEach,
+  beforeEach,
+  beforeAll,
+} from "bun:test"
 
 import { getModelMode, isResponsesOnlyModel } from "../src/lib/model-routing"
 import { state } from "../src/lib/state"
@@ -30,6 +37,12 @@ describe("isResponsesOnlyModel", () => {
     expect(isResponsesOnlyModel("claude-sonnet-4-5")).toBe(false))
   test("o4-mini → chat", () =>
     expect(isResponsesOnlyModel("o4-mini")).toBe(false))
+  test("o4-pro → responses-only", () =>
+    expect(isResponsesOnlyModel("o4-pro")).toBe(true))
+  test("o1-pro-2025-04-09 (dated alias) → responses-only", () =>
+    expect(isResponsesOnlyModel("o1-pro-2025-04-09")).toBe(true))
+  test("o3-pro-mini → NOT responses-only (not a pro variant)", () =>
+    expect(isResponsesOnlyModel("o3-pro-mini")).toBe(false))
 })
 
 // ---------------------------------------------------------------------------
@@ -37,7 +50,11 @@ describe("isResponsesOnlyModel", () => {
 // ---------------------------------------------------------------------------
 
 describe("getModelMode — with loaded models list", () => {
-  const savedModels = state.models
+  let savedModels: typeof state.models
+
+  beforeEach(() => {
+    savedModels = state.models
+  })
 
   afterEach(() => {
     state.models = savedModels
@@ -69,7 +86,7 @@ describe("getModelMode — with loaded models list", () => {
     expect(getModelMode("future-responses-model")).toBe("responses")
   })
 
-  test("model with capabilities.type=chat in list → falls through to heuristic", () => {
+  test("model with explicit capabilities.type=chat in list → chat (upstream authoritative)", () => {
     state.models = {
       object: "list",
       data: [
@@ -92,9 +109,8 @@ describe("getModelMode — with loaded models list", () => {
         },
       ],
     }
-    // capabilities.type is "chat" so list check doesn't return "responses",
-    // falls through to static heuristic which sees "codex" → responses
-    expect(getModelMode("gpt-5-codex")).toBe("responses")
+    // capabilities.type = "chat" is authoritative → returns "chat" even though name contains "codex"
+    expect(getModelMode("gpt-5-codex")).toBe("chat")
   })
 
   test("regular chat model → chat", () => {
@@ -139,6 +155,8 @@ describe("getModelMode — with loaded models list", () => {
 // ---------------------------------------------------------------------------
 
 describe("chat-completions route blocks responses-only models", () => {
+  let savedModels: typeof state.models
+
   beforeAll(() => {
     state.copilotToken = "test-token"
     state.vsCodeVersion = "1.99.0"
@@ -146,6 +164,14 @@ describe("chat-completions route blocks responses-only models", () => {
     state.manualApprove = false
   })
 
+  beforeEach(() => {
+    savedModels = state.models
+  })
+
+  afterEach(() => {
+    state.models = savedModels
+  })
+
   test("gpt-5-codex → 400 with responses_only_model code", async () => {
     const res = await server.request("/v1/chat/completions", {
       method: "POST",
@@ -196,4 +222,61 @@ describe("chat-completions route blocks responses-only models", () => {
     }
     expect(body.error.code).toBe("responses_only_model")
   })
+
+  test("model with capabilities.type=responses in state is blocked at /v1/chat/completions", async () => {
+    // Set up a model that only the capabilities path would catch (not the heuristic)
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "o5-turbo", // no "codex", not "o\d+-pro"
+          vendor: "OpenAI",
+          name: "O5 Turbo",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "gpt",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "responses",
+          },
+        },
+      ],
+    }
+
+    const res = await server.request("/v1/chat/completions", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "o5-turbo",
+        messages: [{ role: "user", content: "hi" }],
+      }),
+    })
+    expect(res.status).toBe(400)
+    const body = (await res.json()) as { error: { code: string } }
+    expect(body.error.code).toBe("responses_only_model")
+  })
+
+  test("gpt-4o is NOT blocked at /v1/chat/completions (chat model)", async () => {
+    // gpt-4o is a chat model — should pass the guard (will fail at upstream but not with 400)
+    // We just need status !== 400 with code responses_only_model
+    const res = await server.request("/v1/chat/completions", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        messages: [{ role: "user", content: "hi" }],
+      }),
+    })
+    // Should NOT return the routing 400
+    if (res.status === 400) {
+      const body = (await res.json()) as { error?: { code?: string } }
+      expect(body.error?.code).not.toBe("responses_only_model")
+    }
+    // Any other status is fine (500 from missing upstream, etc.)
+  })
 })

From 68d6b94aabdc8171dd7002ffb1d2be2b49d620f1 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 15:32:45 +0800
Subject: [PATCH 15/17] fix(routing): undefined model guard, codex regex,
 capabilities type narrowing (#5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- getModelMode: return "chat" early when modelId is falsy — prevents
  TypeError crash on requests missing the model field
- isResponsesOnlyModel: anchor codex check with word boundaries
  (/(?:^|-)codex(?:-|$)/) to avoid false-positives on future codex-mini
- ModelCapabilities.type: narrow to "chat"|"responses"|(string & {})
  so routing logic is type-checked against known upstream values

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/lib/model-routing.ts           | 6 +++++-
 src/services/copilot/get-models.ts | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts
index b2aeb3abc..89f6ddd30 100644
--- a/src/lib/model-routing.ts
+++ b/src/lib/model-routing.ts
@@ -26,6 +26,9 @@ export type ModelMode = "chat" | "responses"
  * "responses" = must use /responses; "chat" = use /chat/completions (or native Anthropic).
  */
 export function getModelMode(modelId: string): ModelMode {
+  // Guard: treat missing/empty model as "chat" — upstream will reject with a proper error
+  if (!modelId) return "chat"
+
   // 1. Check state.models capabilities if available (future-proof)
   if (state.models?.data) {
     const entry = state.models.data.find((m) => m.id === modelId)
@@ -43,7 +46,8 @@ export function getModelMode(modelId: string): ModelMode {
  */
 export function isResponsesOnlyModel(modelId: string): boolean {
   // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc.
-  if (modelId.includes("codex")) return true
+  // Anchored to word boundaries to avoid matching hypothetical future "codex-mini" chat models.
+  if (/(?:^|-)codex(?:-|$)/.test(modelId)) return true
   // o-pro family: o1-pro, o3-pro, o1-pro-2025-04-09, o3-pro-2025-01-10, etc.
   // Covers: o\d+-pro(?:-\d{4}-\d{2}-\d{2})? — requires string to end after "pro" or date
   if (/^o\d+-pro(?:-\d{4}-\d{2}-\d{2})?$/.test(modelId)) return true
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3cfa30af0..fb215bce8 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -36,7 +36,8 @@ interface ModelCapabilities {
   object: string
   supports: ModelSupports
   tokenizer: string
-  type: string
+  /** Known values: "chat" | "responses". Open string for forward-compat. */
+  type: "chat" | "responses" | (string & {})
 }
 
 export interface Model {

From e1df9cce5ad460808ccdd803eb822f8bc05c9832 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 15:36:33 +0800
Subject: [PATCH 16/17] feat(responses): preserve encrypted_content, strip null
 status (#6)

Adds a sanitiseResponsesOutput translation layer that guarantees
encrypted_content on reasoning items is never stripped (required for
multi-turn continuity) and removes status: null fields that Copilot
upstream rejects on re-submission (litellm PR #22370).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts     |   9 +-
 src/routes/responses/translation.ts |  44 ++++++++++
 tests/responses-translation.test.ts | 130 ++++++++++++++++++++++++++++
 3 files changed, 180 insertions(+), 3 deletions(-)
 create mode 100644 src/routes/responses/translation.ts
 create mode 100644 tests/responses-translation.test.ts

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 01d52ade0..44de3eaf5 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -7,7 +7,9 @@ import { awaitApproval } from "~/lib/approval"
 import { state } from "~/lib/state"
 import { createResponses } from "~/services/copilot/create-responses"
 
-import type { ResponsesPayload } from "./types"
+import type { ResponsesPayload, ResponsesResponse } from "./types"
+
+import { sanitiseResponsesOutput } from "./translation"
 
 export async function handleResponses(c: Context): Promise<Response> {
   let payload: ResponsesPayload
@@ -35,11 +37,12 @@ export async function handleResponses(c: Context): Promise<Response> {
   const response = await createResponses(payload)
 
   if (!payload.stream) {
+    const sanitised = sanitiseResponsesOutput(response as ResponsesResponse)
     consola.debug(
       "Responses non-streaming response:",
-      JSON.stringify(response).slice(0, 400),
+      JSON.stringify(sanitised).slice(0, 400),
     )
-    return c.json(response)
+    return c.json(sanitised)
   }
 
   // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through)
diff --git a/src/routes/responses/translation.ts b/src/routes/responses/translation.ts
new file mode 100644
index 000000000..262a79984
--- /dev/null
+++ b/src/routes/responses/translation.ts
@@ -0,0 +1,44 @@
+/**
+ * Response translation for the Responses API path.
+ *
+ * Key invariants:
+ *  - reasoning items MUST preserve `encrypted_content` verbatim (required for
+ *    multi-turn continuity — see issue #6 and litellm PR #17130)
+ *  - `status: null` fields are stripped (Copilot upstream rejects null status
+ *    on subsequent turns — see litellm PR #22370)
+ */
+
+import type { ResponsesResponse, ResponsesOutputItem } from "./types"
+
+// Upstream (e.g. litellm) may send `status: null` even though our TypeScript
+// types forbid it.  Use a separate loose type to represent that reality.
+type LooseOutputItem = Omit<ResponsesOutputItem, "status"> & {
+  status?: string | null
+}
+
+/**
+ * Sanitise a Responses API response object before forwarding to the client.
+ *
+ * Guarantees:
+ *  1. `encrypted_content` on reasoning items is preserved (never stripped).
+ *  2. `status: null` is removed from all output items.
+ *  3. All other fields are passed through untouched.
+ */
+export function sanitiseResponsesOutput(
+  response: ResponsesResponse,
+): ResponsesResponse {
+  return {
+    ...response,
+    output: response.output.map((item) => sanitiseOutputItem(item)),
+  }
+}
+
+function sanitiseOutputItem(item: ResponsesOutputItem): ResponsesOutputItem {
+  // Cast to the loose type so the null-status check is valid at compile time.
+  const loose = item as unknown as LooseOutputItem
+  if (loose.status === null) {
+    const { status: _dropped, ...rest } = loose
+    return rest as unknown as ResponsesOutputItem
+  }
+  return item
+}
diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts
new file mode 100644
index 000000000..0d6d06eed
--- /dev/null
+++ b/tests/responses-translation.test.ts
@@ -0,0 +1,130 @@
+import { describe, test, expect } from "bun:test"
+
+import type { ResponsesResponse } from "../src/routes/responses/types"
+
+import { sanitiseResponsesOutput } from "../src/routes/responses/translation"
+
+// Minimal valid response fixture
+function makeResponse(output: ResponsesResponse["output"]): ResponsesResponse {
+  return {
+    id: "resp_test",
+    object: "response",
+    created_at: 1_700_000_000,
+    model: "gpt-5",
+    status: "completed",
+    output,
+  }
+}
+
+describe("sanitiseResponsesOutput", () => {
+  test("preserves encrypted_content on reasoning items", () => {
+    const response = makeResponse([
+      {
+        type: "reasoning",
+        id: "rs_abc",
+        encrypted_content: "opaque-blob-xyz",
+        summary: [{ type: "summary_text", text: "thought about it" }],
+        status: "completed",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    const reasoning = result.output[0] as { encrypted_content?: string }
+    expect(reasoning.encrypted_content).toBe("opaque-blob-xyz")
+  })
+
+  test("strips status: null from reasoning items", () => {
+    const response = makeResponse([
+      {
+        type: "reasoning",
+        id: "rs_null_status",
+        encrypted_content: "blob",
+        // status is null — TypeScript won't allow this directly but upstream sends it
+      } as unknown as ResponsesResponse["output"][0],
+    ])
+
+    const result = sanitiseResponsesOutput(response)
+    const item = result.output[0] as Record<string, unknown>
+    expect("status" in item).toBe(false)
+  })
+
+  test("preserves non-null status on reasoning items", () => {
+    const response = makeResponse([
+      {
+        type: "reasoning",
+        id: "rs_completed",
+        status: "completed",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    expect((result.output[0] as { status: string }).status).toBe("completed")
+  })
+
+  test("passes message items through unchanged", () => {
+    const response = makeResponse([
+      {
+        type: "message",
+        id: "msg_1",
+        role: "assistant",
+        content: [{ type: "output_text", text: "hello" }],
+        status: "completed",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    expect(result.output[0]).toEqual(response.output[0])
+  })
+
+  test("passes function_call items through unchanged", () => {
+    const response = makeResponse([
+      {
+        type: "function_call",
+        id: "fc_1",
+        call_id: "call_abc",
+        name: "get_weather",
+        arguments: '{"city":"London"}',
+        status: "completed",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    expect(result.output[0]).toEqual(response.output[0])
+  })
+
+  test("handles empty output array", () => {
+    const response = makeResponse([])
+    const result = sanitiseResponsesOutput(response)
+    expect(result.output).toEqual([])
+  })
+
+  test("top-level response fields are preserved", () => {
+    const response = makeResponse([])
+    response.usage = { input_tokens: 100, output_tokens: 50, total_tokens: 150 }
+    const result = sanitiseResponsesOutput(response)
+    expect(result.usage).toEqual(response.usage)
+    expect(result.id).toBe("resp_test")
+    expect(result.model).toBe("gpt-5")
+  })
+
+  test("multiple mixed output items all sanitised", () => {
+    const response = makeResponse([
+      {
+        type: "reasoning",
+        id: "rs_1",
+        encrypted_content: "secret",
+        status: null as unknown as "completed",
+      },
+      {
+        type: "message",
+        id: "msg_1",
+        role: "assistant",
+        content: [{ type: "output_text", text: "answer" }],
+        status: "completed",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    // First item: status stripped, encrypted_content preserved
+    const first = result.output[0] as Record<string, unknown>
+    expect("status" in first).toBe(false)
+    expect(first["encrypted_content"]).toBe("secret")
+    // Second item: unchanged
+    expect(result.output[1]).toEqual(response.output[1])
+  })
+})

From b191620e95726bfe0b798d2b5e39da6d9d69e377 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 22:11:45 +0800
Subject: [PATCH 17/17] fix(responses): sanitise SSE stream + fix
 false-positive null-status test (#6)

- Export sanitiseOutputItem so streaming path can use it
- Streaming handler parses each SSE event and strips status:null from
  embedded item/output fields before forwarding
- Fix false-positive test: inject status:null explicitly + assert
  encrypted_content survives; add in_progress / incomplete passthrough tests

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/responses/handler.ts     | 55 ++++++++++++++++++++---------
 src/routes/responses/translation.ts |  8 ++++-
 tests/responses-translation.test.ts | 32 +++++++++++++++--
 3 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 44de3eaf5..53ead897e 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -9,7 +9,7 @@ import { createResponses } from "~/services/copilot/create-responses"
 
 import type { ResponsesPayload, ResponsesResponse } from "./types"
 
-import { sanitiseResponsesOutput } from "./translation"
+import { sanitiseOutputItem, sanitiseResponsesOutput } from "./translation"
 
 export async function handleResponses(c: Context): Promise<Response> {
   let payload: ResponsesPayload
@@ -56,25 +56,46 @@ export async function handleResponses(c: Context): Promise<Response> {
       }>) {
         if (!rawEvent.data) continue
 
-        // Forward verbatim first
-        await stream.writeSSE({
-          event: rawEvent.event,
-          data: rawEvent.data,
-        })
-
-        // Parse only for debug logging
-        try {
-          const parsed = JSON.parse(rawEvent.data) as { type: string }
-          consola.debug("Responses SSE event:", parsed.type)
-        } catch {
-          // [DONE] sentinel is expected at stream end — only warn on unexpected data
-          if (rawEvent.data !== "[DONE]") {
-            consola.warn(
-              "Could not parse Responses SSE chunk for logging:",
-              rawEvent.data.slice(0, 200),
+        // Sanitise status:null from embedded output items before forwarding.
+        // SSE events like response.output_item.done carry full item snapshots
+        // which can contain null status fields rejected by upstream on re-submission.
+        let forwardData = rawEvent.data
+        if (rawEvent.data !== "[DONE]") {
+          try {
+            const parsed = JSON.parse(rawEvent.data) as Record<string, unknown>
+            consola.debug(
+              "Responses SSE event:",
+              (parsed as { type?: string }).type,
             )
+            // Sanitise embedded item or output array
+            if (parsed["item"]) {
+              parsed["item"] = sanitiseOutputItem(
+                parsed["item"] as Parameters<typeof sanitiseOutputItem>[0],
+              )
+            }
+            if (Array.isArray(parsed["output"])) {
+              parsed["output"] = (
+                parsed["output"] as Array<
+                  Parameters<typeof sanitiseOutputItem>[0]
+                >
+              ).map((i) => sanitiseOutputItem(i))
+            }
+            forwardData = JSON.stringify(parsed)
+          } catch {
+            // [DONE] sentinel or truly malformed chunk
+            if (rawEvent.data !== "[DONE]") {
+              consola.warn(
+                "Could not parse Responses SSE chunk for logging:",
+                rawEvent.data.slice(0, 200),
+              )
+            }
           }
         }
+
+        await stream.writeSSE({
+          event: rawEvent.event,
+          data: forwardData,
+        })
       }
     },
     async (err, stream) => {
diff --git a/src/routes/responses/translation.ts b/src/routes/responses/translation.ts
index 262a79984..4fa2c456d 100644
--- a/src/routes/responses/translation.ts
+++ b/src/routes/responses/translation.ts
@@ -33,7 +33,13 @@ export function sanitiseResponsesOutput(
   }
 }
 
-function sanitiseOutputItem(item: ResponsesOutputItem): ResponsesOutputItem {
+/**
+ * Sanitise a single output item from an SSE event or non-streaming response.
+ * Exported so the streaming path can apply the same logic per-event.
+ */
+export function sanitiseOutputItem(
+  item: ResponsesOutputItem,
+): ResponsesOutputItem {
   // Cast to the loose type so the null-status check is valid at compile time.
   const loose = item as unknown as LooseOutputItem
   if (loose.status === null) {
diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts
index 0d6d06eed..b1f4e26c2 100644
--- a/tests/responses-translation.test.ts
+++ b/tests/responses-translation.test.ts
@@ -32,19 +32,47 @@ describe("sanitiseResponsesOutput", () => {
     expect(reasoning.encrypted_content).toBe("opaque-blob-xyz")
   })
 
-  test("strips status: null from reasoning items", () => {
+  test("strips status: null from reasoning items (and preserves encrypted_content)", () => {
+    // Explicitly inject status: null — upstream sends this despite TS types forbidding it
     const response = makeResponse([
       {
         type: "reasoning",
         id: "rs_null_status",
         encrypted_content: "blob",
-        // status is null — TypeScript won't allow this directly but upstream sends it
+        status: null,
       } as unknown as ResponsesResponse["output"][0],
     ])
 
     const result = sanitiseResponsesOutput(response)
     const item = result.output[0] as Record<string, unknown>
+    // status must be stripped
     expect("status" in item).toBe(false)
+    // encrypted_content must survive
+    expect(item["encrypted_content"]).toBe("blob")
+  })
+
+  test("preserves status: in_progress unchanged", () => {
+    const response = makeResponse([
+      {
+        type: "reasoning",
+        id: "rs_inprogress",
+        status: "in_progress",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    expect((result.output[0] as { status: string }).status).toBe("in_progress")
+  })
+
+  test("preserves status: incomplete unchanged", () => {
+    const response = makeResponse([
+      {
+        type: "reasoning",
+        id: "rs_incomplete",
+        status: "incomplete",
+      },
+    ])
+    const result = sanitiseResponsesOutput(response)
+    expect((result.output[0] as { status: string }).status).toBe("incomplete")
   })
 
   test("preserves non-null status on reasoning items", () => {