From c83a9f8365654bb91da4036f8d1c5608f2a3d2be Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 01:49:16 +0800 Subject: [PATCH 01/17] feat: VS Code header simulation accuracy (#37) (#47) * docs: add PRD for vscode-header-simulation (#37) * feat(version-detection): add VS Code + Copilot Chat version auto-detection (#37) - Replace AUR scrape in getVSCodeVersion() with official update.code.visualstudio.com API; keep AUR as secondary fallback; retain hardcoded "1.104.3" tertiary fallback - Add getCopilotChatVersion() querying VS Code Marketplace API for GitHub.copilot-chat; fallback "0.26.7" - Both functions use module-level 24h in-memory TTL cache - Add copilotChatVersion?: string to State interface and initial state - Add tests covering: successful fetch, network failure fallback, and cache hit within TTL Co-Authored-By: Claude Sonnet 4.6 * feat(api-config): wire dynamic versions into headers + startup log (#37) - Add cacheCopilotChatVersion() in utils.ts; call it alongside cacheVSCodeVersion() at startup - Log resolved versions at startup: "VS Code: Copilot Chat: " - Replace hardcoded COPILOT_VERSION constant in api-config.ts with state.copilotChatVersion ?? "0.26.7" in both copilotHeaders() and githubHeaders() - Remove dead EDITOR_PLUGIN_VERSION and USER_AGENT module-level constants - Add JSDoc comment block on copilotHeaders() documenting every header's source/purpose - Add tests/utils.test.ts covering cacheCopilotChatVersion and cacheVSCodeVersion with mocked services Co-Authored-By: Claude Sonnet 4.6 * fix: address review round 1 feedback (#37) - C1: validate version strings against /^\d+\.\d+\.\d+$/ before use in HTTP headers (CRLF injection prevention) - C2: skip cache write when version equals FALLBACK constant, allowing retry on next call - C3/I2: run getVSCodeVersion + getCopilotChatVersion in parallel via Promise.all in start.ts - I1: delete cacheCopilotChatVersion and cacheVSCodeVersion wrapper functions from utils.ts; call service fns directly from start.ts - I3: add consola.warn() before every FALLBACK usage (fetch failures and format check) - I4: remove four Marketplace response interfaces; use eslint-disabled any + unknown typed chain access - I5: remove pointless const cached = cache alias; reference cache directly - I6: tighten AUR regex from /pkgver=([0-9.]+)/ to /pkgver=(\d+\.\d+\.\d+)/ - T1-T7: add missing test cases (malformed JSON, missing pkgver, HTTP 503, empty version, TTL expiry, undefined state fallback, CRLF injection) Co-Authored-By: Claude Sonnet 4.6 * fix: address review round 2 feedback (#37) Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- .gitignore | 2 +- bun.lock | 1 + docs/prd/vscode-header-simulation.md | 67 +++++ src/lib/api-config.ts | 50 +++- src/lib/state.ts | 1 + src/lib/utils.ts | 10 - src/services/get-copilot-chat-version.ts | 82 ++++++ src/services/get-vscode-version.ts | 81 +++++- src/services/version-cache.ts | 6 + src/start.ts | 12 +- tests/utils.test.ts | 45 +++ tests/version-detection.test.ts | 349 +++++++++++++++++++++++ 12 files changed, 667 insertions(+), 39 deletions(-) create mode 100644 docs/prd/vscode-header-simulation.md create mode 100644 src/services/get-copilot-chat-version.ts create mode 100644 src/services/version-cache.ts create mode 100644 tests/utils.test.ts create mode 100644 tests/version-detection.test.ts diff --git a/.gitignore b/.gitignore index 577a4f199..9650fd579 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,4 @@ node_modules/ .eslintcache # build output -dist/ \ No newline at end of file +dist/.crew/ diff --git a/bun.lock b/bun.lock index 20e895e7f..9ece87578 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-api", diff --git a/docs/prd/vscode-header-simulation.md b/docs/prd/vscode-header-simulation.md new file mode 100644 index 000000000..30ae0a91a --- /dev/null +++ b/docs/prd/vscode-header-simulation.md @@ -0,0 +1,67 @@ +# VS Code Header Simulation Accuracy + +## Status +Approved + +## Overview +Auto-detect and keep current the VS Code + Copilot Chat version strings used in every upstream request header, so traffic looks indistinguishable from a real VS Code editor session. + +## Motivation +copilot-api impersonates VS Code Copilot Chat toward GitHub's upstream. Hardcoded version strings become stale as VS Code releases new versions every month. Stale strings increase the distinguishability of copilot-api traffic from legitimate editor traffic. The fix: query live version sources at startup, cache them, and fall back to hardcoded values on failure — so headers always reflect the latest shipping release. + +## Requirements + +1. **VS Code version auto-detect** — On startup, query `https://update.code.visualstudio.com/api/releases/stable` (JSON array, first element is latest stable version). Use the result for `editor-version: vscode/`. +2. **Copilot Chat extension version auto-detect** — On startup, query the VS Code Marketplace API for `GitHub.copilot-chat` and extract the latest version. Use it for `editor-plugin-version: copilot-chat/` and `user-agent: GitHubCopilotChat/`. +3. **24-hour TTL in-memory cache** — Cache both versions for 24 h so repeated token refreshes don't re-query external APIs unnecessarily. +4. **Graceful fallback** — If either fetch fails (network error, timeout, unexpected shape), log a warning and continue with the existing hardcoded fallback values. Never crash startup. +5. **Startup log** — At `consola.info` level, print the resolved version strings (`VSCode: X.Y.Z`, `Copilot Chat: A.B.C`) so the user can verify what's being used. +6. **`x-request-id`** — Confirm it is already generated per-request via `crypto.randomUUID()` (it is — no change needed). +7. **Header documentation** — Add a comment block in `src/lib/api-config.ts` explaining each header's source and how to update it. + +## Acceptance Criteria + +- On a clean startup with network access, printed versions match the latest stable VS Code release visible at `https://code.visualstudio.com/updates/`. +- On startup with network blocked, a warning is logged and the server still starts with fallback values. +- No new CLI flags required — version detection is automatic. +- All existing tests pass. + +## Technical Approach + +### VS Code version +`GET https://update.code.visualstudio.com/api/releases/stable` returns a JSON array of version strings. Take `[0]`. + +### Copilot Chat version +VS Code Marketplace API: +``` +GET https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery +Content-Type: application/json +Accept: application/json;api-version=3.0-preview.1 + +Body: { + "filters": [{ "criteria": [{ "filterType": 7, "value": "GitHub.copilot-chat" }] }], + "flags": 529 +} +``` +Response path: `results[0].extensions[0].versions[0].version` + +### Caching +Simple module-level `{ version: string, fetchedAt: number }` objects. If `Date.now() - fetchedAt < 24 * 60 * 60 * 1000`, return cached value. + +### File changes +- `src/services/get-vscode-version.ts` — extend with VS Code stable API; keep AUR fallback as secondary fallback. +- `src/services/get-copilot-chat-version.ts` — new file for Copilot Chat extension version. +- `src/lib/utils.ts` — `cacheVSCodeVersion()` also calls `cacheCopilotChatVersion()`. +- `src/lib/state.ts` — add `copilotChatVersion?: string`. +- `src/lib/api-config.ts` — use `state.copilotChatVersion` for `editor-plugin-version` and `user-agent`; add header documentation comment. + +## Testing Strategy +- Unit test `get-vscode-version.ts`: mock fetch → returns parsed version; mock fail → returns fallback. +- Unit test `get-copilot-chat-version.ts`: mock fetch → returns parsed version; mock fail → returns fallback. +- Existing translation tests must continue to pass. + +## Out of Scope +- `OpenAI-Organization` header (not confirmed in VS Code traffic). +- `X-Vscode-User-Agent-Library-Comment` (not confirmed). +- Persistent disk cache (in-memory TTL is sufficient for a single server process). +- Auto-restart on version change. diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts index 83bce92ad..294959285 100644 --- a/src/lib/api-config.ts +++ b/src/lib/api-config.ts @@ -1,5 +1,7 @@ import { randomUUID } from "node:crypto" +import { FALLBACK as COPILOT_CHAT_VERSION_FALLBACK } from "~/services/get-copilot-chat-version" + import type { State } from "./state" export const standardHeaders = () => ({ @@ -7,24 +9,38 @@ export const standardHeaders = () => ({ accept: "application/json", }) -const COPILOT_VERSION = "0.26.7" -const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}` -const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}` - const API_VERSION = "2025-04-01" export const copilotBaseUrl = (state: State) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com` + +/** + * Headers sent with every upstream request to mimic VS Code Copilot Chat traffic. + * + * Header sources: + * - Authorization — Copilot token from GitHub OAuth flow + * - editor-version — Auto-detected VS Code stable release (update.code.visualstudio.com) + * - editor-plugin-version — Auto-detected GitHub.copilot-chat Marketplace version + * - user-agent — Same as editor-plugin-version, GitHubCopilotChat/ + * - copilot-integration-id — Fixed "vscode-chat" + * - openai-intent — Fixed "conversation-panel" + * - x-github-api-version — Fixed "2025-04-01" (verify periodically against VS Code source) + * - x-request-id — Per-request UUID via crypto.randomUUID() + * - x-vscode-user-agent-library-version — Fixed "electron-fetch" + * - copilot-vision-request — Added when request includes image content + */ export const copilotHeaders = (state: State, vision: boolean = false) => { + const copilotVersion = + state.copilotChatVersion ?? COPILOT_CHAT_VERSION_FALLBACK const headers: Record = { Authorization: `Bearer ${state.copilotToken}`, "content-type": standardHeaders()["content-type"], "copilot-integration-id": "vscode-chat", "editor-version": `vscode/${state.vsCodeVersion}`, - "editor-plugin-version": EDITOR_PLUGIN_VERSION, - "user-agent": USER_AGENT, + "editor-plugin-version": `copilot-chat/${copilotVersion}`, + "user-agent": `GitHubCopilotChat/${copilotVersion}`, "openai-intent": "conversation-panel", "x-github-api-version": API_VERSION, "x-request-id": randomUUID(), @@ -37,15 +53,19 @@ export const copilotHeaders = (state: State, vision: boolean = false) => { } export const GITHUB_API_BASE_URL = "https://api.github.com" -export const githubHeaders = (state: State) => ({ - ...standardHeaders(), - authorization: `token ${state.githubToken}`, - "editor-version": `vscode/${state.vsCodeVersion}`, - "editor-plugin-version": EDITOR_PLUGIN_VERSION, - "user-agent": USER_AGENT, - "x-github-api-version": API_VERSION, - "x-vscode-user-agent-library-version": "electron-fetch", -}) +export const githubHeaders = (state: State) => { + const copilotVersion = + state.copilotChatVersion ?? COPILOT_CHAT_VERSION_FALLBACK + return { + ...standardHeaders(), + authorization: `token ${state.githubToken}`, + "editor-version": `vscode/${state.vsCodeVersion}`, + "editor-plugin-version": `copilot-chat/${copilotVersion}`, + "user-agent": `GitHubCopilotChat/${copilotVersion}`, + "x-github-api-version": API_VERSION, + "x-vscode-user-agent-library-version": "electron-fetch", + } +} export const GITHUB_BASE_URL = "https://github.com" export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98" diff --git a/src/lib/state.ts b/src/lib/state.ts index 5ba4dc1d1..01f491c35 100644 --- a/src/lib/state.ts +++ b/src/lib/state.ts @@ -7,6 +7,7 @@ export interface State { accountType: string models?: ModelsResponse vsCodeVersion?: string + copilotChatVersion?: string manualApprove: boolean rateLimitWait: boolean diff --git a/src/lib/utils.ts b/src/lib/utils.ts index cc80be667..892c61d74 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -1,7 +1,4 @@ -import consola from "consola" - import { getModels } from "~/services/copilot/get-models" -import { getVSCodeVersion } from "~/services/get-vscode-version" import { state } from "./state" @@ -17,10 +14,3 @@ export async function cacheModels(): Promise { const models = await getModels() state.models = models } - -export const cacheVSCodeVersion = async () => { - const response = await getVSCodeVersion() - state.vsCodeVersion = response - - consola.info(`Using VSCode version: ${response}`) -} diff --git a/src/services/get-copilot-chat-version.ts b/src/services/get-copilot-chat-version.ts new file mode 100644 index 000000000..b4278b669 --- /dev/null +++ b/src/services/get-copilot-chat-version.ts @@ -0,0 +1,82 @@ +import consola from "consola" + +import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache" + +export const FALLBACK = "0.26.7" + +let cache: VersionCache | undefined + +async function fetchFromMarketplace(): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => { + controller.abort() + }, 5000) + + try { + const response = await fetch( + "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery", + { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json;api-version=3.0-preview.1", + }, + body: JSON.stringify({ + filters: [ + { + criteria: [{ filterType: 7, value: "GitHub.copilot-chat" }], + }, + ], + flags: 529, + }), + signal: controller.signal, + }, + ) + + /* eslint-disable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access */ + const data = (await response.json()) as any + const parsed: unknown = + data?.results?.[0]?.extensions?.[0]?.versions?.[0]?.version + /* eslint-enable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access */ + + if (typeof parsed !== "string" || !parsed) { + throw new Error("Unexpected response shape") + } + + return parsed + } finally { + clearTimeout(timeout) + } +} + +export async function getCopilotChatVersion(): Promise { + if (cache && Date.now() - cache.fetchedAt < VERSION_CACHE_TTL_MS) { + return cache.version + } + + let fetched: string | null = null + + try { + fetched = await fetchFromMarketplace() + } catch { + consola.warn( + "Failed to fetch Copilot Chat version from Marketplace, using fallback", + ) + } + + const version = + fetched !== null && /^\d+\.\d+\.\d+$/.test(fetched) ? fetched : FALLBACK + + if (fetched !== null && version !== FALLBACK) { + // eslint-disable-next-line require-atomic-updates + cache = { version, fetchedAt: Date.now() } + } else if (fetched !== null) { + // Format validation rejected the fetched value + const safeVersion = fetched.slice(0, 40).replaceAll(/[^\x20-\x7E]/g, "?") + consola.warn( + `Invalid version format received: ${safeVersion}, using fallback`, + ) + } + + return version +} diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts index 6078f09b5..f9732bfed 100644 --- a/src/services/get-vscode-version.ts +++ b/src/services/get-vscode-version.ts @@ -1,6 +1,36 @@ +import consola from "consola" + +import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache" + const FALLBACK = "1.104.3" -export async function getVSCodeVersion() { +let cache: VersionCache | undefined + +async function fetchFromOfficialApi(): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => { + controller.abort() + }, 5000) + + try { + const response = await fetch( + "https://update.code.visualstudio.com/api/releases/stable", + { signal: controller.signal }, + ) + + const versions = (await response.json()) as Array + + if (Array.isArray(versions) && versions.length > 0 && versions[0]) { + return versions[0] + } + + throw new Error("Unexpected response shape") + } finally { + clearTimeout(timeout) + } +} + +async function fetchFromAur(): Promise { const controller = new AbortController() const timeout = setTimeout(() => { controller.abort() @@ -9,25 +39,54 @@ export async function getVSCodeVersion() { try { const response = await fetch( "https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h=visual-studio-code-bin", - { - signal: controller.signal, - }, + { signal: controller.signal }, ) const pkgbuild = await response.text() - const pkgverRegex = /pkgver=([0-9.]+)/ - const match = pkgbuild.match(pkgverRegex) + const match = pkgbuild.match(/pkgver=(\d+\.\d+\.\d+)/) - if (match) { + if (match?.[1]) { return match[1] } - return FALLBACK - } catch { - return FALLBACK + throw new Error("Version not found in PKGBUILD") } finally { clearTimeout(timeout) } } -await getVSCodeVersion() +export async function getVSCodeVersion(): Promise { + if (cache && Date.now() - cache.fetchedAt < VERSION_CACHE_TTL_MS) { + return cache.version + } + + let fetched: string | null = null + + try { + fetched = await fetchFromOfficialApi() + } catch { + try { + fetched = await fetchFromAur() + } catch { + consola.warn( + "Failed to fetch VS Code version from all sources, using fallback", + ) + } + } + + const version = + fetched !== null && /^\d+\.\d+\.\d+$/.test(fetched) ? fetched : FALLBACK + + if (fetched !== null && version !== FALLBACK) { + // eslint-disable-next-line require-atomic-updates + cache = { version, fetchedAt: Date.now() } + } else if (fetched !== null) { + // Format validation rejected the fetched value + const safeVersion = fetched.slice(0, 40).replaceAll(/[^\x20-\x7E]/g, "?") + consola.warn( + `Invalid version format received: ${safeVersion}, using fallback`, + ) + } + + return version +} diff --git a/src/services/version-cache.ts b/src/services/version-cache.ts new file mode 100644 index 000000000..839d29438 --- /dev/null +++ b/src/services/version-cache.ts @@ -0,0 +1,6 @@ +export const VERSION_CACHE_TTL_MS = 24 * 60 * 60 * 1000 + +export interface VersionCache { + version: string + fetchedAt: number +} diff --git a/src/start.ts b/src/start.ts index 14abbbdff..9fca3b37f 100644 --- a/src/start.ts +++ b/src/start.ts @@ -11,8 +11,10 @@ import { initProxyFromEnv } from "./lib/proxy" import { generateEnvScript } from "./lib/shell" import { state } from "./lib/state" import { setupCopilotToken, setupGitHubToken } from "./lib/token" -import { cacheModels, cacheVSCodeVersion } from "./lib/utils" +import { cacheModels } from "./lib/utils" import { server } from "./server" +import { getCopilotChatVersion } from "./services/get-copilot-chat-version" +import { getVSCodeVersion } from "./services/get-vscode-version" interface RunServerOptions { port: number @@ -48,7 +50,13 @@ export async function runServer(options: RunServerOptions): Promise { state.showToken = options.showToken await ensurePaths() - await cacheVSCodeVersion() + ;[state.vsCodeVersion, state.copilotChatVersion] = await Promise.all([ + getVSCodeVersion(), + getCopilotChatVersion(), + ]) + consola.info( + `VS Code: ${state.vsCodeVersion} Copilot Chat: ${state.copilotChatVersion}`, + ) if (options.githubToken) { state.githubToken = options.githubToken diff --git a/tests/utils.test.ts b/tests/utils.test.ts new file mode 100644 index 000000000..923d15545 --- /dev/null +++ b/tests/utils.test.ts @@ -0,0 +1,45 @@ +import { describe, test, expect, mock, beforeEach } from "bun:test" + +import type { ModelsResponse } from "../src/services/copilot/get-models" + +// --------------------------------------------------------------------------- +// cacheModels — integration test against the real state singleton, +// with the service function mocked. +// --------------------------------------------------------------------------- + +const fakeModels: ModelsResponse = { + object: "list", + data: [], +} + +const mockGetModels = mock(() => Promise.resolve(fakeModels)) + +void mock.module("../src/services/copilot/get-models", () => ({ + getModels: mockGetModels, +})) + +// Import after mocking so the mocks are active +import { state } from "../src/lib/state" +import { cacheModels } from "../src/lib/utils" + +describe("cacheModels", () => { + beforeEach(() => { + state.models = undefined + mockGetModels.mockReset() + }) + + test("sets state.models with value from service", async () => { + mockGetModels.mockResolvedValue(fakeModels) + + expect(state.models).toBeUndefined() + await cacheModels() + expect(state.models).toEqual(fakeModels) + }) + + test("calls getModels exactly once", async () => { + mockGetModels.mockResolvedValue(fakeModels) + + await cacheModels() + expect(mockGetModels).toHaveBeenCalledTimes(1) + }) +}) diff --git a/tests/version-detection.test.ts b/tests/version-detection.test.ts new file mode 100644 index 000000000..a0a4b96e8 --- /dev/null +++ b/tests/version-detection.test.ts @@ -0,0 +1,349 @@ +import { describe, test, expect, mock, beforeEach } from "bun:test" +import { setSystemTime } from "bun:test" + +import type { state as StateType } from "../src/lib/state" +import type { getCopilotChatVersion as GetCopilotChatVersion } from "../src/services/get-copilot-chat-version" +import type { getVSCodeVersion as GetVSCodeVersion } from "../src/services/get-vscode-version" + +// --------------------------------------------------------------------------- +// We test the modules by mocking global `fetch` before importing them. +// Each describe block re-imports after resetting the module registry so we +// get fresh module-level caches for every suite. +// --------------------------------------------------------------------------- + +// Helpers ---------------------------------------------------------------- + +function makeFetchMock(handler: (url: string, init?: RequestInit) => Response) { + return mock((url: string | URL | Request, init?: RequestInit) => { + const urlStr = url instanceof Request ? url.url : url.toString() + return Promise.resolve(handler(urlStr, init)) + }) as unknown as typeof fetch +} + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { "Content-Type": "application/json" }, + }) +} + +function textResponse(body: string, status = 200): Response { + return new Response(body, { + status, + headers: { "Content-Type": "text/plain" }, + }) +} + +// Module type aliases for properly typed dynamic imports +type VSCodeVersionModule = { getVSCodeVersion: typeof GetVSCodeVersion } +type CopilotChatVersionModule = { + getCopilotChatVersion: typeof GetCopilotChatVersion +} +type StateModule = { state: typeof StateType } + +// --------------------------------------------------------------------------- +// getVSCodeVersion tests +// --------------------------------------------------------------------------- + +describe("getVSCodeVersion", () => { + let callCount = 0 + + beforeEach(() => { + callCount = 0 + }) + + test("returns version from official VS Code API (primary path)", async () => { + globalThis.fetch = makeFetchMock((_url) => { + callCount++ + return jsonResponse(["1.99.0", "1.98.0"]) + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now()}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.99.0") + expect(callCount).toBe(1) + }) + + test("falls back to AUR when official API fails", async () => { + let requestIndex = 0 + globalThis.fetch = makeFetchMock((_url) => { + const i = requestIndex++ + if (i === 0) throw new Error("network error") + // AUR PKGBUILD response + return textResponse("pkgver=1.88.0\narch=(x86_64)") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 1}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.88.0") + }) + + test("returns hardcoded fallback when both official API and AUR fail", async () => { + globalThis.fetch = makeFetchMock((_url) => { + throw new Error("offline") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 2}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.104.3") + }) + + test("cache prevents second fetch within TTL", async () => { + let fetchCallCount = 0 + globalThis.fetch = makeFetchMock((_url) => { + fetchCallCount++ + return jsonResponse(["1.99.5"]) + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 3}` + )) as VSCodeVersionModule + + const v1 = await mod.getVSCodeVersion() + const v2 = await mod.getVSCodeVersion() + + expect(v1).toBe("1.99.5") + expect(v2).toBe("1.99.5") + // fetch should only have been called once + expect(fetchCallCount).toBe(1) + }) + + // T1 — VS Code API returns malformed JSON (non-array body {}): falls back to AUR + test("T1: falls back to AUR when official API returns non-array body", async () => { + let requestIndex = 0 + globalThis.fetch = makeFetchMock((_url) => { + const i = requestIndex++ + if (i === 0) return jsonResponse({}) // non-array — triggers "Unexpected response shape" + // AUR PKGBUILD response + return textResponse("pkgver=1.90.0\narch=(x86_64)") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 10}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.90.0") + expect(requestIndex).toBe(2) + }) + + // T2 — AUR PKGBUILD missing pkgver= line: returns hardcoded fallback + test("T2: returns hardcoded fallback when AUR PKGBUILD has no pkgver line", async () => { + let requestIndex = 0 + globalThis.fetch = makeFetchMock((_url) => { + const i = requestIndex++ + if (i === 0) throw new Error("network error") + // AUR response missing pkgver= + return textResponse("pkgdesc='VSCode'\npkgrel=1\n") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 11}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.104.3") + }) + + // T5 — TTL expiry triggers refetch + test("T5: TTL expiry triggers a new fetch", async () => { + const CACHE_TTL = 24 * 60 * 60 * 1000 + let fetchCount = 0 + globalThis.fetch = makeFetchMock((_url) => { + fetchCount++ + return jsonResponse(["1.99.0"]) + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 12}` + )) as VSCodeVersionModule + + // First call — populates cache + await mod.getVSCodeVersion() + expect(fetchCount).toBe(1) + + // Advance clock past TTL + setSystemTime(new Date(Date.now() + CACHE_TTL + 1)) + + try { + // Second call — cache expired, should fetch again + await mod.getVSCodeVersion() + expect(fetchCount).toBe(2) + } finally { + // Always reset system time + setSystemTime() + } + }) +}) + +// --------------------------------------------------------------------------- +// getCopilotChatVersion tests +// --------------------------------------------------------------------------- + +describe("getCopilotChatVersion", () => { + const MARKETPLACE_URL = + "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery" + + const validMarketplaceResponse = { + results: [ + { + extensions: [ + { + versions: [{ version: "0.30.1" }], + }, + ], + }, + ], + } + + test("returns version from Marketplace API", async () => { + globalThis.fetch = makeFetchMock((url) => { + expect(url).toBe(MARKETPLACE_URL) + return jsonResponse(validMarketplaceResponse) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now()}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.30.1") + }) + + test("returns hardcoded fallback on network error", async () => { + globalThis.fetch = makeFetchMock((_url) => { + throw new Error("connection refused") + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 1}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + test("returns hardcoded fallback when API response has unexpected shape", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return jsonResponse({ results: [] }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 2}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + test("cache prevents second fetch within TTL", async () => { + let fetchCallCount = 0 + globalThis.fetch = makeFetchMock((_url) => { + fetchCallCount++ + return jsonResponse(validMarketplaceResponse) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 3}` + )) as CopilotChatVersionModule + + const v1 = await mod.getCopilotChatVersion() + const v2 = await mod.getCopilotChatVersion() + + expect(v1).toBe("0.30.1") + expect(v2).toBe("0.30.1") + expect(fetchCallCount).toBe(1) + }) + + // T3 — Marketplace returns HTTP 503 + test("T3: returns fallback when Marketplace returns HTTP 503", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return new Response("Service Unavailable", { status: 503 }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 10}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + // T4 — Marketplace returns version: "" (empty string) + test("T4: returns fallback when Marketplace version is empty string", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return jsonResponse({ + results: [{ extensions: [{ versions: [{ version: "" }] }] }], + }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 11}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + // T7 — Format validation rejects CRLF-injected version + test("T7: rejects version with CRLF injection and returns fallback", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return jsonResponse({ + results: [ + { + extensions: [{ versions: [{ version: "1.0.0\r\nEvil: header" }] }], + }, + ], + }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 12}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) +}) + +// --------------------------------------------------------------------------- +// State interface test — shape check +// --------------------------------------------------------------------------- + +describe("State type includes copilotChatVersion", () => { + test("state object accepts copilotChatVersion field", async () => { + const { state } = (await import( + `../src/lib/state.ts?t=${Date.now()}` + )) as StateModule + // Field must be optionally present (undefined by default) + expect(state.copilotChatVersion).toBeUndefined() + + // Should be assignable without TS errors (runtime check) + state.copilotChatVersion = "0.26.7" + expect(state.copilotChatVersion).toBe("0.26.7") + }) +}) + +// --------------------------------------------------------------------------- +// T6 — api-config header uses fallback when copilotChatVersion is undefined +// --------------------------------------------------------------------------- + +describe("copilotHeaders fallback", () => { + test("T6: editor-plugin-version uses fallback string when state.copilotChatVersion is undefined", async () => { + const { copilotHeaders } = await import("../src/lib/api-config") + const minimalState = { + accountType: "individual", + manualApprove: false, + rateLimitWait: false, + showToken: false, + copilotToken: "tok", + vsCodeVersion: "1.99.0", + copilotChatVersion: undefined, + } + + const headers = copilotHeaders( + minimalState as Parameters[0], + false, + ) + expect(headers["editor-plugin-version"]).not.toBe("copilot-chat/undefined") + expect(headers["editor-plugin-version"]).toBe("copilot-chat/0.26.7") + }) +}) From cc11c1d22b3f6e4c805bb314699ba00cc4790708 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 01:50:28 +0800 Subject: [PATCH 02/17] feat(native-anthropic): add pass-through service, dispatch, type fixes (#38-#45) --- docs/prd/native-anthropic-passthrough.md | 74 ++++++++ src/routes/messages/anthropic-types.ts | 39 ++++- src/routes/messages/handler.ts | 75 +++++++- src/routes/messages/non-stream-translation.ts | 26 ++- .../copilot/create-messages-native.ts | 161 ++++++++++++++++++ src/services/copilot/native-models.ts | 57 +++++++ 6 files changed, 412 insertions(+), 20 deletions(-) create mode 100644 docs/prd/native-anthropic-passthrough.md create mode 100644 src/services/copilot/create-messages-native.ts create mode 100644 src/services/copilot/native-models.ts diff --git a/docs/prd/native-anthropic-passthrough.md b/docs/prd/native-anthropic-passthrough.md new file mode 100644 index 000000000..606c98ca8 --- /dev/null +++ b/docs/prd/native-anthropic-passthrough.md @@ -0,0 +1,74 @@ +# Native Anthropic Pass-Through for Claude Models + +## Status +Approved + +## Overview +Route Anthropic `/v1/messages` requests for Claude models directly to the GitHub Copilot upstream's native Anthropic endpoint, bypassing the existing OpenAI translation layer. This preserves thinking blocks with `signature` field, `top_k`, `cache_control`, and richer usage stats — none of which survive the current translation round-trip. + +## Motivation +GitHub Copilot's upstream (`api.enterprise.githubcopilot.com`) natively speaks the Anthropic Messages API for all Claude 4.5+ models. The current code path translates Anthropic → OpenAI → sends → translates back, losing: +- `thinking` blocks (completely dropped) +- `signature` field on thinking blocks (required for multi-turn reasoning) +- `cache_creation_input_tokens` in usage +- `top_k` parameter +- `cache_control` on system/user blocks + +The fix: detect Claude models by `vendor === "Anthropic"` from the `/models` endpoint, and forward requests verbatim to `/v1/messages` upstream. + +## Requirements + +1. **`create-messages-native.ts`** — Service client that POSTs Anthropic payloads directly to `${copilotBaseUrl}/v1/messages` with correct headers (`anthropic-version`, `anthropic-beta`). +2. **Route dispatch** — `handler.ts` checks `isNativeAnthropicModel(model)` and branches to native path for Claude, translation path for everything else. +3. **`native-models.ts`** — `isNativeAnthropicModel(modelId)` checks `state.models` vendor field; falls back to `claude-` prefix heuristic before models load. +4. **Type fixes** — `anthropic-types.ts`: `signature?` on `AnthropicThinkingBlock`; union `thinking` type for adaptive (opus-4.7+); `output_config`; `AnthropicImageBlock` URL source; `AnthropicToolResultBlock.content` widened. +5. **Adaptive thinking upgrade** — `create-messages-native.ts` auto-upgrades `{ type: "enabled" }` → `{ type: "adaptive" }` + `output_config.effort` for `claude-opus-4.7+` models. +6. **SSE proxy** — Streaming responses from native path forwarded verbatim to client (no re-translation needed). + +## Acceptance Criteria + +- Claude models (`vendor === "Anthropic"`) route to native path; non-Claude models route to translation path. +- Thinking blocks with `signature` field returned to client in both streaming and non-streaming. +- Multi-turn conversations with thinking blocks (echoing `signature`) work correctly. +- `claude-opus-4.7+` with `{ type: "enabled" }` thinking auto-upgrades to adaptive format; no HTTP 400. +- All existing tests pass; new tests cover native vs. translation dispatch. + +## Technical Approach + +### Model detection +`state.models.data` from `/models` endpoint has `vendor: "Anthropic"` for all Claude models. `isNativeAnthropicModel()` checks this first, falls back to `startsWith("claude-")` heuristic. + +### Headers for native path +``` +anthropic-version: 2023-06-01 +anthropic-beta: interleaved-thinking-2025-05-14,prompt-caching-2024-07-31 +``` +Plus all standard Copilot headers (auth, editor-version, etc.). + +### Streaming proxy +Native upstream sends proper Anthropic SSE events. Parse `event.type` for logging; forward `rawEvent.data` verbatim. No translation needed. + +### Adaptive thinking (opus-4.7+) +If model matches `/^claude-opus-4[.-](\d+)/` with minor ≥ 7, auto-upgrade `{ type: "enabled", budget_tokens: N }` → `{ type: "adaptive" }` + `output_config: { effort: "medium" }`. + +## File Changes + +**New:** +- `src/services/copilot/create-messages-native.ts` +- `src/services/copilot/native-models.ts` + +**Modified:** +- `src/routes/messages/anthropic-types.ts` — type fixes +- `src/routes/messages/handler.ts` — dispatch logic +- `src/routes/messages/non-stream-translation.ts` — remove stale comment; fix image source narrowing + +## Testing Strategy +- Unit: `isNativeAnthropicModel()` with populated vs empty `state.models` +- Unit: `buildUpstreamPayload()` adaptive thinking upgrade +- Integration: handler routes Claude models to native, GPT models to translation +- Existing translation tests must still pass + +## Out of Scope +- Persistent caching of native responses +- URL image sources (rejected by upstream; type kept for fidelity) +- Responses API (#1 epic) diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..dda7657b3 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -18,9 +18,16 @@ export interface AnthropicMessagesPayload { type: "auto" | "any" | "tool" | "none" name?: string } - thinking?: { - type: "enabled" - budget_tokens?: number + /** + * Thinking config. + * - Legacy (claude-3.7 / claude-4.5): `{ type: "enabled", budget_tokens: N }` + * - New adaptive (claude-opus-4.7+): `{ type: "adaptive" }` paired with + * `output_config.effort` in the request body. + */ + thinking?: { type: "enabled"; budget_tokens?: number } | { type: "adaptive" } + /** Used together with `thinking: { type: "adaptive" }` on opus-4.7+. */ + output_config?: { + effort?: "low" | "medium" | "high" } service_tier?: "auto" | "standard_only" } @@ -32,17 +39,24 @@ export interface AnthropicTextBlock { export interface AnthropicImageBlock { type: "image" - source: { - type: "base64" - media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" - data: string - } + source: + | { + type: "base64" + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" + data: string + } + | { + /** URL images are rejected by Copilot upstream — kept for type fidelity only. */ + type: "url" + url: string + } } export interface AnthropicToolResultBlock { type: "tool_result" tool_use_id: string - content: string + /** May be a plain string or an array of content blocks. */ + content: string | Array is_error?: boolean } @@ -56,6 +70,12 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + /** + * Opaque signature returned by the upstream for extended thinking blocks. + * Must be echoed back in subsequent turns to enable multi-turn reasoning. + * Present on native pass-through responses; absent on translated responses. + */ + signature?: string } export type AnthropicUserContentBlock = @@ -106,6 +126,7 @@ export interface AnthropicResponse { output_tokens: number cache_creation_input_tokens?: number cache_read_input_tokens?: number + /** Present on native pass-through responses. */ service_tier?: "standard" | "priority" | "batch" } } diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..cf691ffd7 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -11,9 +11,12 @@ import { type ChatCompletionChunk, type ChatCompletionResponse, } from "~/services/copilot/create-chat-completions" +import { createMessagesNative } from "~/services/copilot/create-messages-native" +import { isNativeAnthropicModel } from "~/services/copilot/native-models" import { type AnthropicMessagesPayload, + type AnthropicStreamEventData, type AnthropicStreamState, } from "./anthropic-types" import { @@ -28,16 +31,80 @@ export async function handleCompletion(c: Context) { const anthropicPayload = await c.req.json() consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) + if (state.manualApprove) { + await awaitApproval() + } + + // Route to native Anthropic pass-through for Claude models to preserve + // thinking blocks (with signature), top_k, cache_control, and richer usage. + if (isNativeAnthropicModel(anthropicPayload.model)) { + return handleNative(c, anthropicPayload) + } + + return handleTranslated(c, anthropicPayload) +} + +// --------------------------------------------------------------------------- +// Native Anthropic pass-through (Claude 4.5+ models) +// --------------------------------------------------------------------------- + +async function handleNative( + c: Context, + payload: AnthropicMessagesPayload, +): Promise { + consola.debug("Using native Anthropic pass-through for", payload.model) + + const response = await createMessagesNative(payload) + + if (!payload.stream) { + // Non-streaming: upstream already returned a complete Anthropic response + consola.debug( + "Native non-streaming response:", + JSON.stringify(response).slice(0, 400), + ) + return c.json(response) + } + + // Streaming: proxy the SSE events directly to the client + consola.debug("Native streaming response — proxying SSE events") + return streamSSE(c, async (stream) => { + for await (const rawEvent of response as AsyncIterable<{ + data?: string + event?: string + }>) { + if (rawEvent.data === "[DONE]") break + if (!rawEvent.data) continue + + // Parse to log but forward the original JSON verbatim + try { + const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData + consola.debug("Native SSE event:", parsed.type) + await stream.writeSSE({ + event: parsed.type, + data: rawEvent.data, + }) + } catch { + // Malformed chunk — skip + consola.warn("Could not parse native SSE chunk:", rawEvent.data) + } + } + }) +} + +// --------------------------------------------------------------------------- +// Translation path (non-Claude models via /chat/completions) +// --------------------------------------------------------------------------- + +async function handleTranslated( + c: Context, + anthropicPayload: AnthropicMessagesPayload, +): Promise { const openAIPayload = translateToOpenAI(anthropicPayload) consola.debug( "Translated OpenAI request payload:", JSON.stringify(openAIPayload), ) - if (state.manualApprove) { - await awaitApproval() - } - const response = await createChatCompletions(openAIPayload) if (isNonStreaming(response)) { diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..e154c3714 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,5 @@ +import consola from "consola" + import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -213,12 +215,20 @@ function mapContent( break } case "image": { - contentParts.push({ - type: "image_url", - image_url: { - url: `data:${block.source.media_type};base64,${block.source.data}`, - }, - }) + if (block.source.type === "base64") { + contentParts.push({ + type: "image_url", + image_url: { + url: `data:${block.source.media_type};base64,${block.source.data}`, + }, + }) + } else { + // URL images are rejected by Copilot upstream — skip silently + // (type kept for fidelity when round-tripping through native path) + consola.warn( + "URL image source not supported in translation path — skipping", + ) + } break } @@ -302,7 +312,9 @@ export function translateToAnthropic( } } - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses + // Note: the translation path routes Claude models via /chat/completions which + // does not return thinking blocks. For thinking block support use the native + // Anthropic pass-through path (create-messages-native.ts). return { id: response.id, diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts new file mode 100644 index 000000000..0fef793bf --- /dev/null +++ b/src/services/copilot/create-messages-native.ts @@ -0,0 +1,161 @@ +/** + * Native Anthropic pass-through service. + * + * The GitHub Copilot upstream (`api.enterprise.githubcopilot.com`) natively + * speaks the Anthropic Messages API for all Claude 4.5+ models. Routing + * requests directly to `/v1/messages` instead of translating them through + * `/chat/completions` gives us: + * + * - Real thinking blocks with `signature` field (multi-turn reasoning) + * - `cache_creation_input_tokens` in usage + * - `top_k` support + * - No lossy translation round-trip + * + * See research notes: ~/copilot-models-litellm/copilot_models.py + */ + +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +/** + * Forward an Anthropic-format request directly to Copilot's native `/v1/messages` + * endpoint, preserving all fields (thinking, signature, top_k, cache_control, …). + * + * Returns: + * - For non-streaming: the raw Anthropic JSON response object + * - For streaming: an async iterable of SSE events (fetch-event-stream) + */ +export const createMessagesNative = async ( + payload: AnthropicMessagesPayload, +) => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const hasVision = messageHasImages(payload) + const headers = buildNativeHeaders(hasVision) + + const upstream = `${copilotBaseUrl(state)}/v1/messages` + consola.debug("Native Anthropic upstream:", upstream) + + // Strip fields that are Copilot-API–specific or unsupported by upstream + const body = buildUpstreamPayload(payload) + + const response = await fetch(upstream, { + method: "POST", + headers, + body: JSON.stringify(body), + }) + + if (!response.ok) { + consola.error("Native Anthropic upstream error", response.status) + throw new HTTPError("Native Anthropic upstream error", response) + } + + if (payload.stream) { + return events(response) + } + + return response.json() +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Build headers for the Anthropic native endpoint. + * + * The upstream requires `anthropic-version` and does NOT want an `openai-intent` + * header. We reuse `copilotHeaders()` for auth/agent headers and then layer the + * Anthropic-specific ones on top. + */ +function buildNativeHeaders(vision: boolean): Record { + const base = copilotHeaders(state, vision) + + // The native /v1/messages endpoint expects these Anthropic headers + return { + ...base, + "anthropic-version": "2023-06-01", + // Enable beta features: extended thinking + prompt caching + "anthropic-beta": + "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", + // Accept Anthropic streaming format + accept: "text/event-stream", + // The upstream doesn't use openai-intent for the messages path + // but leaving it does no harm; keep for header consistency + } +} + +/** + * Produce the payload forwarded to upstream. + * + * We pass through almost everything verbatim. The only transformation is that + * `claude-opus-4.7+` requires the new adaptive thinking format + * (`thinking: { type: "adaptive" }` + `output_config.effort`) rather than the + * legacy `{ type: "enabled", budget_tokens: N }`. If the caller already sent + * the correct format we leave it alone; if they sent the old format and the + * model requires adaptive, we upgrade automatically. + */ +function buildUpstreamPayload( + payload: AnthropicMessagesPayload, +): AnthropicMessagesPayload { + const { thinking, output_config, ...rest } = payload + + if (!thinking) { + return payload + } + + if (isAdaptiveThinkingModel(payload.model)) { + // Upgrade legacy enabled → adaptive if needed + if (thinking.type === "enabled") { + consola.debug( + `Upgrading thinking format to adaptive for model ${payload.model}`, + ) + return { + ...rest, + thinking: { type: "adaptive" }, + output_config: output_config ?? { effort: "medium" }, + } + } + // Already adaptive — forward as-is + return { ...rest, thinking, output_config } + } + + // Non-adaptive model — forward legacy format, drop output_config + return { ...rest, thinking } +} + +/** + * Models that require the new adaptive thinking API. + * Populated dynamically at dispatch time via `isNativeAnthropicModel()`. + * This hard-coded check is the fallback. + */ +function isAdaptiveThinkingModel(model: string): boolean { + // claude-opus-4.7 and above use adaptive thinking + const match = model.match(/^claude-opus-4[.-](\d+)/) + if (match) { + const minor = Number.parseInt(match[1], 10) + return minor >= 7 + } + return false +} + +/** + * Check whether the request contains any image blocks (to set vision headers). + */ +function messageHasImages(payload: AnthropicMessagesPayload): boolean { + for (const msg of payload.messages) { + if (typeof msg.content === "string") continue + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "image") return true + } + } + } + return false +} diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts new file mode 100644 index 000000000..e411b34e4 --- /dev/null +++ b/src/services/copilot/native-models.ts @@ -0,0 +1,57 @@ +/** + * Dynamic detection of which models support native Anthropic pass-through. + * + * The Copilot `/models` endpoint returns a `vendor` field for each model. + * Any model with `vendor === "Anthropic"` is served natively via the + * `/v1/messages` path at `api.enterprise.githubcopilot.com`. + * + * We cache the set of native model IDs after the first `/models` call and + * keep it in sync with `state.models` (which is refreshed periodically by + * the token-rotation logic). + */ + +import { state } from "~/lib/state" + +/** + * Returns true if the given model ID should be routed to the native + * Anthropic pass-through service instead of the OpenAI chat-completions + * translation layer. + * + * Resolution order: + * 1. If `state.models` is populated, check whether the model's vendor is + * "Anthropic" (live, always up-to-date). + * 2. Fall back to a static prefix list for resilience at startup before + * the models list is fetched. + */ +export function isNativeAnthropicModel(modelId: string): boolean { + if (state.models?.data) { + const entry = state.models.data.find((m) => m.id === modelId) + if (entry) { + return entry.vendor === "Anthropic" + } + // Model not found in list — fall through to prefix heuristic + } + + return matchesAnthropicPrefix(modelId) +} + +/** + * Static prefix heuristic used before `state.models` is populated. + * Covers all current Claude variants served by Copilot. + */ +function matchesAnthropicPrefix(modelId: string): boolean { + return modelId.startsWith("claude-") || modelId.startsWith("claude_") +} + +/** + * Return the full list of model IDs that support native Anthropic pass-through, + * derived from `state.models`. Useful for logging / diagnostics. + * + * Falls back to an empty array if the models list has not been fetched yet. + */ +export function nativeAnthropicModelIds(): ReadonlyArray { + if (!state.models?.data) return [] + return state.models.data + .filter((m) => m.vendor === "Anthropic") + .map((m) => m.id) +} From dce9e6c3355e686ea28ddbecb5d6891adb255ce8 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 01:57:22 +0800 Subject: [PATCH 03/17] fix: address review round 1 feedback for native pass-through (#38) - H1: Remove dead [DONE] sentinel from native SSE loop (Anthropic terminates via connection close) - H3: Conditionally set accept: text/event-stream only when streaming - M1: buildUpstreamPayload returns rest (not payload) when thinking absent, stripping output_config - M2: Truncate raw SSE data to 200 chars in warn log to prevent log injection - L2: Remove claude_ underscore prefix heuristic (no known Anthropic model uses it) - L3: Document >= 7 threshold comment in isAdaptiveThinkingModel - L4: Replace verbose JSDoc on nativeAnthropicModelIds with concise standard form - Export buildUpstreamPayload for direct unit testing - Add tests/native-passthrough.test.ts with T1-T9 covering payload transform and model routing Co-Authored-By: Claude Sonnet 4.6 --- src/routes/messages/handler.ts | 6 +- .../copilot/create-messages-native.ts | 19 ++- src/services/copilot/native-models.ts | 8 +- tests/native-passthrough.test.ts | 160 ++++++++++++++++++ 4 files changed, 178 insertions(+), 15 deletions(-) create mode 100644 tests/native-passthrough.test.ts diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index cf691ffd7..2bf1005ea 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -72,7 +72,6 @@ async function handleNative( data?: string event?: string }>) { - if (rawEvent.data === "[DONE]") break if (!rawEvent.data) continue // Parse to log but forward the original JSON verbatim @@ -85,7 +84,10 @@ async function handleNative( }) } catch { // Malformed chunk — skip - consola.warn("Could not parse native SSE chunk:", rawEvent.data) + consola.warn( + "Could not parse native SSE chunk:", + rawEvent.data.slice(0, 200), + ) } } }) diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts index 0fef793bf..e6c41cb72 100644 --- a/src/services/copilot/create-messages-native.ts +++ b/src/services/copilot/create-messages-native.ts @@ -37,7 +37,7 @@ export const createMessagesNative = async ( if (!state.copilotToken) throw new Error("Copilot token not found") const hasVision = messageHasImages(payload) - const headers = buildNativeHeaders(hasVision) + const headers = buildNativeHeaders(hasVision, Boolean(payload.stream)) const upstream = `${copilotBaseUrl(state)}/v1/messages` consola.debug("Native Anthropic upstream:", upstream) @@ -74,7 +74,10 @@ export const createMessagesNative = async ( * header. We reuse `copilotHeaders()` for auth/agent headers and then layer the * Anthropic-specific ones on top. */ -function buildNativeHeaders(vision: boolean): Record { +function buildNativeHeaders( + vision: boolean, + stream: boolean, +): Record { const base = copilotHeaders(state, vision) // The native /v1/messages endpoint expects these Anthropic headers @@ -84,10 +87,9 @@ function buildNativeHeaders(vision: boolean): Record { // Enable beta features: extended thinking + prompt caching "anthropic-beta": "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", - // Accept Anthropic streaming format - accept: "text/event-stream", - // The upstream doesn't use openai-intent for the messages path - // but leaving it does no harm; keep for header consistency + // Only request SSE streaming format when the caller is streaming; + // non-streaming calls should use the default application/json accept + ...(stream ? { accept: "text/event-stream" } : {}), } } @@ -101,13 +103,13 @@ function buildNativeHeaders(vision: boolean): Record { * the correct format we leave it alone; if they sent the old format and the * model requires adaptive, we upgrade automatically. */ -function buildUpstreamPayload( +export function buildUpstreamPayload( payload: AnthropicMessagesPayload, ): AnthropicMessagesPayload { const { thinking, output_config, ...rest } = payload if (!thinking) { - return payload + return rest // safe: output_config only valid alongside thinking } if (isAdaptiveThinkingModel(payload.model)) { @@ -140,6 +142,7 @@ function isAdaptiveThinkingModel(model: string): boolean { const match = model.match(/^claude-opus-4[.-](\d+)/) if (match) { const minor = Number.parseInt(match[1], 10) + // claude-opus-4.7 and later use the new adaptive thinking API (not legacy budget_tokens) return minor >= 7 } return false diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts index e411b34e4..7c3d4f59b 100644 --- a/src/services/copilot/native-models.ts +++ b/src/services/copilot/native-models.ts @@ -40,14 +40,12 @@ export function isNativeAnthropicModel(modelId: string): boolean { * Covers all current Claude variants served by Copilot. */ function matchesAnthropicPrefix(modelId: string): boolean { - return modelId.startsWith("claude-") || modelId.startsWith("claude_") + return modelId.startsWith("claude-") } /** - * Return the full list of model IDs that support native Anthropic pass-through, - * derived from `state.models`. Useful for logging / diagnostics. - * - * Falls back to an empty array if the models list has not been fetched yet. + * Returns all model IDs that support native Anthropic pass-through. + * Used for diagnostics and startup logging. */ export function nativeAnthropicModelIds(): ReadonlyArray { if (!state.models?.data) return [] diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts new file mode 100644 index 000000000..759df813d --- /dev/null +++ b/tests/native-passthrough.test.ts @@ -0,0 +1,160 @@ +import { describe, test, expect, afterEach } from "bun:test" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +import { state } from "~/lib/state" +import { buildUpstreamPayload } from "~/services/copilot/create-messages-native" +import { isNativeAnthropicModel } from "~/services/copilot/native-models" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Minimal valid payload base — only the fields required by the type. */ +function basePayload( + overrides: Partial, +): AnthropicMessagesPayload { + return { + model: "claude-sonnet-4-5", + messages: [{ role: "user", content: "hi" }], + max_tokens: 1024, + ...overrides, + } +} + +// --------------------------------------------------------------------------- +// buildUpstreamPayload tests +// --------------------------------------------------------------------------- + +describe("buildUpstreamPayload", () => { + // T1 — output_config present but thinking absent → output_config stripped + test("T1: strips output_config when thinking is absent", () => { + const payload = basePayload({ + output_config: { effort: "high" }, + }) + const result = buildUpstreamPayload(payload) + expect(result).not.toHaveProperty("output_config") + expect(result).not.toHaveProperty("thinking") + }) + + // T2 — adaptive upgrade preserves explicit output_config: { effort: "high" } + test("T2: adaptive upgrade preserves explicit output_config effort", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + output_config: { effort: "high" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + // Should keep caller's "high", not override to "medium" + expect(result.output_config).toEqual({ effort: "high" }) + }) + + // T3 — already adaptive → forwarded as-is + test("T3: already-adaptive thinking forwarded as-is", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "adaptive" }, + output_config: { effort: "low" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "low" }) + }) + + // T4 — legacy model with enabled thinking → kept as-is, no adaptive upgrade + test("T4: legacy model with enabled thinking kept as-is", () => { + const payload = basePayload({ + model: "claude-sonnet-4-5", + thinking: { type: "enabled", budget_tokens: 1024 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) + expect(result).not.toHaveProperty("output_config") + }) +}) + +// --------------------------------------------------------------------------- +// isNativeAnthropicModel tests +// --------------------------------------------------------------------------- + +// Save original models state and restore after each test +const originalModels = state.models + +afterEach(() => { + state.models = originalModels +}) + +describe("isNativeAnthropicModel", () => { + // T5 — model in loaded list with vendor "Anthropic" → true + test("T5: model with vendor Anthropic in loaded list → true", () => { + state.models = { + object: "list", + data: [ + { + id: "claude-sonnet-4-5", + vendor: "Anthropic", + name: "Claude Sonnet 4.5", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "claude", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(isNativeAnthropicModel("claude-sonnet-4-5")).toBe(true) + }) + + // T6 — model in loaded list with vendor "OpenAI" → false + test("T6: model with vendor OpenAI in loaded list → false", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-4o", + vendor: "OpenAI", + name: "GPT-4o", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(isNativeAnthropicModel("gpt-4o")).toBe(false) + }) + + // T7 — model NOT in loaded list, starts with "claude-" → true (heuristic) + test("T7: model not in loaded list but starts with claude- → true", () => { + state.models = { object: "list", data: [] } + expect(isNativeAnthropicModel("claude-future-1")).toBe(true) + }) + + // T8 — model NOT in loaded list, starts with "gpt-" → false + test("T8: model not in loaded list and starts with gpt- → false", () => { + state.models = { object: "list", data: [] } + expect(isNativeAnthropicModel("gpt-5")).toBe(false) + }) + + // T9 — state.models undefined → heuristic + test("T9: state.models undefined → heuristic (claude- prefix → true)", () => { + state.models = undefined + expect(isNativeAnthropicModel("claude-something")).toBe(true) + }) +}) From 6c92355168144fbcdc60cb11ec3dc56464aeab49 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:11:37 +0800 Subject: [PATCH 04/17] fix: address review round 2 feedback for native pass-through (#38) --- src/routes/messages/handler.ts | 15 +-- .../copilot/create-messages-native.ts | 19 ++-- src/services/copilot/native-models.ts | 19 ---- tests/native-passthrough.test.ts | 105 +++++++++++++++++- 4 files changed, 120 insertions(+), 38 deletions(-) diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 2bf1005ea..e383b2dc0 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -74,18 +74,19 @@ async function handleNative( }>) { if (!rawEvent.data) continue - // Parse to log but forward the original JSON verbatim + // Forward verbatim — never block on parse failure + await stream.writeSSE({ + event: rawEvent.event, + data: rawEvent.data, + }) + + // Parse only for debug logging try { const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData consola.debug("Native SSE event:", parsed.type) - await stream.writeSSE({ - event: parsed.type, - data: rawEvent.data, - }) } catch { - // Malformed chunk — skip consola.warn( - "Could not parse native SSE chunk:", + "Could not parse native SSE chunk for logging:", rawEvent.data.slice(0, 200), ) } diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts index e6c41cb72..8b9fb54fd 100644 --- a/src/services/copilot/create-messages-native.ts +++ b/src/services/copilot/create-messages-native.ts @@ -80,15 +80,16 @@ function buildNativeHeaders( ): Record { const base = copilotHeaders(state, vision) - // The native /v1/messages endpoint expects these Anthropic headers + // Remove headers that are OpenAI-specific and not expected by Anthropic endpoint + const { "openai-intent": _dropped, ...anthropicBase } = base + return { - ...base, + ...anthropicBase, "anthropic-version": "2023-06-01", // Enable beta features: extended thinking + prompt caching "anthropic-beta": "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", - // Only request SSE streaming format when the caller is streaming; - // non-streaming calls should use the default application/json accept + // Only request SSE streaming format when the caller is streaming ...(stream ? { accept: "text/event-stream" } : {}), } } @@ -121,7 +122,8 @@ export function buildUpstreamPayload( return { ...rest, thinking: { type: "adaptive" }, - output_config: output_config ?? { effort: "medium" }, + output_config: + output_config?.effort ? output_config : { effort: "medium" }, } } // Already adaptive — forward as-is @@ -133,9 +135,10 @@ export function buildUpstreamPayload( } /** - * Models that require the new adaptive thinking API. - * Populated dynamically at dispatch time via `isNativeAnthropicModel()`. - * This hard-coded check is the fallback. + * Returns true for models that require the adaptive thinking API + * (`{ type: "adaptive" }` + `output_config.effort`) rather than the + * legacy `{ type: "enabled", budget_tokens: N }`. + * Currently: claude-opus-4.7 and later. */ function isAdaptiveThinkingModel(model: string): boolean { // claude-opus-4.7 and above use adaptive thinking diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts index 7c3d4f59b..7d731d01b 100644 --- a/src/services/copilot/native-models.ts +++ b/src/services/copilot/native-models.ts @@ -32,24 +32,5 @@ export function isNativeAnthropicModel(modelId: string): boolean { // Model not found in list — fall through to prefix heuristic } - return matchesAnthropicPrefix(modelId) -} - -/** - * Static prefix heuristic used before `state.models` is populated. - * Covers all current Claude variants served by Copilot. - */ -function matchesAnthropicPrefix(modelId: string): boolean { return modelId.startsWith("claude-") } - -/** - * Returns all model IDs that support native Anthropic pass-through. - * Used for diagnostics and startup logging. - */ -export function nativeAnthropicModelIds(): ReadonlyArray { - if (!state.models?.data) return [] - return state.models.data - .filter((m) => m.vendor === "Anthropic") - .map((m) => m.id) -} diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts index 759df813d..6e4f09c72 100644 --- a/tests/native-passthrough.test.ts +++ b/tests/native-passthrough.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, afterEach } from "bun:test" +import { describe, test, expect, beforeEach, afterEach } from "bun:test" import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" @@ -72,17 +72,45 @@ describe("buildUpstreamPayload", () => { expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) expect(result).not.toHaveProperty("output_config") }) + + // T5 — adaptive upgrade with no output_config → defaults to effort:medium + test("T5: adaptive upgrade with no output_config defaults to effort:medium", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled", budget_tokens: 1024 }, + // output_config intentionally absent + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "medium" }) + }) + + // T6 — output_config: {} also triggers default (not bypassed) + test("T6: empty output_config triggers medium effort default", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + output_config: {}, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "medium" }) + }) }) // --------------------------------------------------------------------------- // isNativeAnthropicModel tests // --------------------------------------------------------------------------- -// Save original models state and restore after each test -const originalModels = state.models +// Per-test state isolation +let savedModels: typeof state.models + +beforeEach(() => { + savedModels = state.models +}) afterEach(() => { - state.models = originalModels + state.models = savedModels }) describe("isNativeAnthropicModel", () => { @@ -158,3 +186,72 @@ describe("isNativeAnthropicModel", () => { expect(isNativeAnthropicModel("claude-something")).toBe(true) }) }) + +// --------------------------------------------------------------------------- +// isAdaptiveThinkingModel boundary tests (via buildUpstreamPayload) +// --------------------------------------------------------------------------- + +describe("isAdaptiveThinkingModel boundaries (via buildUpstreamPayload)", () => { + // B1 — claude-opus-4.6 is NOT upgraded (one below threshold) + test("B1: claude-opus-4.6 does NOT get adaptive upgrade", () => { + const payload = basePayload({ + model: "claude-opus-4.6", + thinking: { type: "enabled", budget_tokens: 2048 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 2048 }) + expect(result).not.toHaveProperty("output_config") + }) + + // B2 — claude-opus-4.7 IS upgraded (exact threshold) + test("B2: claude-opus-4.7 (dot separator) IS upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B3 — claude-opus-4-7 (dash separator) IS upgraded + test("B3: claude-opus-4-7 (dash separator) IS upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-opus-4-7", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B4 — claude-opus-4-6 (dash separator) is NOT upgraded + test("B4: claude-opus-4-6 (dash separator) NOT upgraded", () => { + const payload = basePayload({ + model: "claude-opus-4-6", + thinking: { type: "enabled", budget_tokens: 512 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 512 }) + expect(result).not.toHaveProperty("output_config") + }) + + // B5 — claude-opus-4.8 (one above threshold) IS upgraded + test("B5: claude-opus-4.8 (one above threshold) IS upgraded", () => { + const payload = basePayload({ + model: "claude-opus-4.8", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B6 — claude-sonnet-4.7 (non-opus) is NOT upgraded + test("B6: claude-sonnet-4.7 (non-opus) NOT upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-sonnet-4.7", + thinking: { type: "enabled", budget_tokens: 1024 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) + expect(result).not.toHaveProperty("output_config") + }) +}) From 9409034979a4ad4f9ef680cfc81d56c8f3e7521d Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:22:14 +0800 Subject: [PATCH 05/17] fix: add vsCodeVersion fallback guard in api-config.ts (#46) Export FALLBACK from get-vscode-version.ts and apply `state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK` in both copilotHeaders calls, matching the existing copilotChatVersion pattern. Co-Authored-By: Claude Sonnet 4.6 --- src/lib/api-config.ts | 5 +++-- src/services/get-vscode-version.ts | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts index 294959285..09d235bf7 100644 --- a/src/lib/api-config.ts +++ b/src/lib/api-config.ts @@ -1,6 +1,7 @@ import { randomUUID } from "node:crypto" import { FALLBACK as COPILOT_CHAT_VERSION_FALLBACK } from "~/services/get-copilot-chat-version" +import { FALLBACK as VSCODE_VERSION_FALLBACK } from "~/services/get-vscode-version" import type { State } from "./state" @@ -38,7 +39,7 @@ export const copilotHeaders = (state: State, vision: boolean = false) => { Authorization: `Bearer ${state.copilotToken}`, "content-type": standardHeaders()["content-type"], "copilot-integration-id": "vscode-chat", - "editor-version": `vscode/${state.vsCodeVersion}`, + "editor-version": `vscode/${state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK}`, "editor-plugin-version": `copilot-chat/${copilotVersion}`, "user-agent": `GitHubCopilotChat/${copilotVersion}`, "openai-intent": "conversation-panel", @@ -59,7 +60,7 @@ export const githubHeaders = (state: State) => { return { ...standardHeaders(), authorization: `token ${state.githubToken}`, - "editor-version": `vscode/${state.vsCodeVersion}`, + "editor-version": `vscode/${state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK}`, "editor-plugin-version": `copilot-chat/${copilotVersion}`, "user-agent": `GitHubCopilotChat/${copilotVersion}`, "x-github-api-version": API_VERSION, diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts index f9732bfed..bfb92de5e 100644 --- a/src/services/get-vscode-version.ts +++ b/src/services/get-vscode-version.ts @@ -2,7 +2,7 @@ import consola from "consola" import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache" -const FALLBACK = "1.104.3" +export const FALLBACK = "1.104.3" let cache: VersionCache | undefined From a36fa096004985394f21a7f9017977b4c2d4c7e5 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:26:39 +0800 Subject: [PATCH 06/17] feat(responses): route scaffolding + reasoning_effort types (#2, #7) Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 27 ++++ src/routes/responses/route.ts | 9 ++ src/routes/responses/types.ts | 125 ++++++++++++++++++ src/server.ts | 5 + .../copilot/create-chat-completions.ts | 3 + 5 files changed, 169 insertions(+) create mode 100644 src/routes/responses/handler.ts create mode 100644 src/routes/responses/route.ts create mode 100644 src/routes/responses/types.ts diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts new file mode 100644 index 000000000..0f32dcb18 --- /dev/null +++ b/src/routes/responses/handler.ts @@ -0,0 +1,27 @@ +import type { Context } from "hono" + +import consola from "consola" + +import type { ResponsesPayload } from "./types" + +export async function handleResponses(c: Context): Promise { + const payload = await c.req.json() + consola.debug("Responses API request payload:", JSON.stringify(payload)) + + // TODO(#4): wire up createResponses() service client + // For now return a structured 501 so the route is exercisable + consola.warn( + "POST /v1/responses is not yet implemented — service client pending (#4)", + ) + return c.json( + { + error: { + message: + "Responses API service client not yet implemented. See issue #4.", + type: "not_implemented", + code: "responses_not_implemented", + }, + }, + 501, + ) +} diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts new file mode 100644 index 000000000..ac6aa20f1 --- /dev/null +++ b/src/routes/responses/route.ts @@ -0,0 +1,9 @@ +import { Hono } from "hono" + +import { handleResponses } from "./handler" + +const responses = new Hono() + +responses.post("/", handleResponses) + +export default responses diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts new file mode 100644 index 000000000..8953e55de --- /dev/null +++ b/src/routes/responses/types.ts @@ -0,0 +1,125 @@ +// Request types +export interface ResponsesPayload { + model: string + input: Array + instructions?: string + tools?: Array + tool_choice?: + | "auto" + | "none" + | "required" + | { type: "function"; name: string } + temperature?: number | null + top_p?: number | null + max_output_tokens?: number | null + reasoning?: { + effort?: "minimal" | "low" | "medium" | "high" + summary?: "auto" | "concise" | "detailed" + } | null + previous_response_id?: string | null + store?: boolean | null + include?: Array | null + stream?: boolean | null + metadata?: Record | null + parallel_tool_calls?: boolean | null + service_tier?: "auto" | "default" | null + truncation?: "auto" | "disabled" | null + user?: string | null +} + +// Input item types (union) +export type ResponsesInputItem = + | ResponsesInputMessage + | ResponsesFunctionCallOutput + | ResponsesReasoningItem + +export interface ResponsesInputMessage { + type: "message" + role: "user" | "assistant" | "system" | "developer" + content: string | Array + id?: string + status?: "completed" | "incomplete" | null +} + +export interface ResponsesFunctionCallOutput { + type: "function_call_output" + call_id: string + output: string +} + +export interface ResponsesReasoningItem { + type: "reasoning" + id: string + encrypted_content?: string + summary?: Array<{ type: "summary_text"; text: string }> + status?: "completed" | "in_progress" | "incomplete" | null +} + +export type ResponsesContentPart = + | { type: "input_text"; text: string } + | { type: "input_image"; image_url: string; detail?: "low" | "high" | "auto" } + +// Tool types +export interface ResponsesTool { + type: "function" + name: string + description?: string + parameters?: Record + strict?: boolean +} + +// Output item types (response) +export type ResponsesOutputItem = + | ResponsesOutputMessage + | ResponsesOutputFunctionCall + | ResponsesOutputReasoning + +export interface ResponsesOutputMessage { + type: "message" + id: string + role: "assistant" + content: Array + status: "completed" | "incomplete" | "in_progress" +} + +export interface ResponsesOutputFunctionCall { + type: "function_call" + id: string + call_id: string + name: string + arguments: string + status: "completed" | "incomplete" | "in_progress" +} + +export interface ResponsesOutputReasoning { + type: "reasoning" + id: string + encrypted_content?: string + summary?: Array<{ type: "summary_text"; text: string }> + status: "completed" | "incomplete" | "in_progress" +} + +export type ResponsesOutputContentPart = + | { type: "output_text"; text: string; annotations?: Array } + | { type: "refusal"; refusal: string } + +// Response type +export interface ResponsesResponse { + id: string + object: "response" + created_at: number + model: string + status: "completed" | "incomplete" | "in_progress" | "failed" + output: Array + usage?: { + input_tokens: number + output_tokens: number + total_tokens: number + input_tokens_details?: { cached_tokens?: number } + output_tokens_details?: { reasoning_tokens?: number } + } + error?: { code: string; message: string } | null + incomplete_details?: { reason: string } | null + metadata?: Record | null + service_tier?: string +} diff --git a/src/server.ts b/src/server.ts index 462a278f3..6e6b6a878 100644 --- a/src/server.ts +++ b/src/server.ts @@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route" import { embeddingRoutes } from "./routes/embeddings/route" import { messageRoutes } from "./routes/messages/route" import { modelRoutes } from "./routes/models/route" +import responses from "./routes/responses/route" import { tokenRoute } from "./routes/token/route" import { usageRoute } from "./routes/usage/route" @@ -29,3 +30,7 @@ server.route("/v1/embeddings", embeddingRoutes) // Anthropic compatible endpoints server.route("/v1/messages", messageRoutes) + +// OpenAI Responses API +server.route("/responses", responses) +server.route("/v1/responses", responses) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..fc96517e4 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -71,6 +71,7 @@ export interface ChatCompletionChunk { interface Delta { content?: string | null + reasoning_content?: string | null role?: "user" | "assistant" | "system" | "tool" tool_calls?: Array<{ index: number @@ -112,6 +113,7 @@ export interface ChatCompletionResponse { interface ResponseMessage { role: "assistant" content: string | null + reasoning_content?: string | null tool_calls?: Array } @@ -148,6 +150,7 @@ export interface ChatCompletionsPayload { | { type: "function"; function: { name: string } } | null user?: string | null + reasoning_effort?: "minimal" | "low" | "medium" | "high" | null } export interface Tool { From d417a7c53c1c271b392fd9d578d8c38c1d97d3b1 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:31:50 +0800 Subject: [PATCH 07/17] fix(responses): type fixes + error handling + route tests (#2, #7) Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 9 +++-- src/routes/responses/types.ts | 19 ++++++++- .../copilot/create-chat-completions.ts | 2 +- tests/responses-route.test.ts | 39 +++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 tests/responses-route.test.ts diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index 0f32dcb18..e976252be 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -5,11 +5,14 @@ import consola from "consola" import type { ResponsesPayload } from "./types" export async function handleResponses(c: Context): Promise { - const payload = await c.req.json() - consola.debug("Responses API request payload:", JSON.stringify(payload)) + try { + const payload = await c.req.json() + consola.debug("Responses API request payload:", JSON.stringify(payload)) + } catch { + consola.debug("Responses API request received (could not parse body)") + } // TODO(#4): wire up createResponses() service client - // For now return a structured 501 so the route is exercisable consola.warn( "POST /v1/responses is not yet implemented — service client pending (#4)", ) diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts index 8953e55de..a5241129b 100644 --- a/src/routes/responses/types.ts +++ b/src/routes/responses/types.ts @@ -13,7 +13,7 @@ export interface ResponsesPayload { top_p?: number | null max_output_tokens?: number | null reasoning?: { - effort?: "minimal" | "low" | "medium" | "high" + effort?: "low" | "medium" | "high" summary?: "auto" | "concise" | "detailed" } | null previous_response_id?: string | null @@ -30,6 +30,7 @@ export interface ResponsesPayload { // Input item types (union) export type ResponsesInputItem = | ResponsesInputMessage + | ResponsesInputFunctionCall | ResponsesFunctionCallOutput | ResponsesReasoningItem @@ -41,6 +42,15 @@ export interface ResponsesInputMessage { status?: "completed" | "incomplete" | null } +export interface ResponsesInputFunctionCall { + type: "function_call" + id?: string + call_id: string + name: string + arguments: string + status?: "completed" | "in_progress" | "incomplete" | null +} + export interface ResponsesFunctionCallOutput { type: "function_call_output" call_id: string @@ -57,7 +67,12 @@ export interface ResponsesReasoningItem { export type ResponsesContentPart = | { type: "input_text"; text: string } - | { type: "input_image"; image_url: string; detail?: "low" | "high" | "auto" } + | { + type: "input_image" + image_url?: string | null + file_id?: string | null + detail?: "low" | "high" | "auto" + } // Tool types export interface ResponsesTool { diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index fc96517e4..ea4eb5d55 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -150,7 +150,7 @@ export interface ChatCompletionsPayload { | { type: "function"; function: { name: string } } | null user?: string | null - reasoning_effort?: "minimal" | "low" | "medium" | "high" | null + reasoning_effort?: "low" | "medium" | "high" | null } export interface Tool { diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts new file mode 100644 index 000000000..8f5b496c3 --- /dev/null +++ b/tests/responses-route.test.ts @@ -0,0 +1,39 @@ +import { describe, test, expect } from "bun:test" + +import { server } from "../src/server" + +describe("POST /v1/responses stub", () => { + test("returns 501 with structured error body", async () => { + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [] }), + }) + expect(res.status).toBe(501) + const body = (await res.json()) as { + error: { type: string; code: string; message: string } + } + expect(body.error.type).toBe("not_implemented") + expect(body.error.code).toBe("responses_not_implemented") + expect(typeof body.error.message).toBe("string") + }) + + test("bare /responses path also returns 501", async () => { + const res = await server.request("/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [] }), + }) + expect(res.status).toBe(501) + const body = (await res.json()) as { error: { code: string } } + expect(body.error.code).toBe("responses_not_implemented") + }) + + test("empty body returns 501 (not 500)", async () => { + const res = await server.request("/v1/responses", { + method: "POST", + // no body + }) + expect(res.status).toBe(501) + }) +}) From 977a30ff0f553498f58ba75b2251492cbba40968 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:37:25 +0800 Subject: [PATCH 08/17] fix(responses): complete type coverage for Responses API (#2) - input: accept string | Array (string shorthand) - ResponsesContentPart: add input_file variant (file_id / file_data) - ResponsesResponse.status: add "cancelled" - ResponsesFunctionCallOutput: add optional id field - service_tier: narrow to "default" | "flex" | string - handler: downgrade warn to info (stub log not a health signal) Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 2 +- src/routes/responses/types.ts | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index e976252be..6e0a257b0 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -13,7 +13,7 @@ export async function handleResponses(c: Context): Promise { } // TODO(#4): wire up createResponses() service client - consola.warn( + consola.info( "POST /v1/responses is not yet implemented — service client pending (#4)", ) return c.json( diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts index a5241129b..642df7719 100644 --- a/src/routes/responses/types.ts +++ b/src/routes/responses/types.ts @@ -1,7 +1,7 @@ // Request types export interface ResponsesPayload { model: string - input: Array + input: string | Array instructions?: string tools?: Array tool_choice?: @@ -53,6 +53,7 @@ export interface ResponsesInputFunctionCall { export interface ResponsesFunctionCallOutput { type: "function_call_output" + id?: string call_id: string output: string } @@ -73,6 +74,14 @@ export type ResponsesContentPart = file_id?: string | null detail?: "low" | "high" | "auto" } + | { + type: "input_file" + /** Pre-uploaded file via Files API */ + file_id?: string + /** Base64-encoded inline file content */ + file_data?: string + filename?: string + } // Tool types export interface ResponsesTool { @@ -124,7 +133,7 @@ export interface ResponsesResponse { object: "response" created_at: number model: string - status: "completed" | "incomplete" | "in_progress" | "failed" + status: "completed" | "incomplete" | "in_progress" | "failed" | "cancelled" output: Array usage?: { input_tokens: number @@ -136,5 +145,5 @@ export interface ResponsesResponse { error?: { code: string; message: string } | null incomplete_details?: { reason: string } | null metadata?: Record | null - service_tier?: string + service_tier?: "default" | "flex" | (string & {}) } From 65a4522425053061c99708a20e263a62138f1104 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:47:38 +0800 Subject: [PATCH 09/17] feat(responses): add upstream service client + wire handler (#4) Implements createResponses() service client modelled on createChatCompletions, with inputHasImages/isAgentCall helpers and X-Initiator header logic; wires the /responses handler to call the real service client with manualApprove gate and full streaming/non-streaming SSE proxy; updates route tests to cover the live handler behaviour instead of the old 501 stub. Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 78 +++++++++++++++----- src/services/copilot/create-responses.ts | 93 ++++++++++++++++++++++++ tests/responses-route.test.ts | 92 ++++++++++++++++++----- 3 files changed, 226 insertions(+), 37 deletions(-) create mode 100644 src/services/copilot/create-responses.ts diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index 6e0a257b0..4a490fe38 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -1,30 +1,72 @@ import type { Context } from "hono" import consola from "consola" +import { streamSSE } from "hono/streaming" + +import { awaitApproval } from "~/lib/approval" +import { state } from "~/lib/state" +import { createResponses } from "~/services/copilot/create-responses" import type { ResponsesPayload } from "./types" export async function handleResponses(c: Context): Promise { + let payload: ResponsesPayload try { - const payload = await c.req.json() - consola.debug("Responses API request payload:", JSON.stringify(payload)) + payload = await c.req.json() } catch { - consola.debug("Responses API request received (could not parse body)") + return c.json( + { + error: { + message: "Invalid JSON body", + type: "invalid_request_error", + code: "invalid_json", + }, + }, + 400, + ) } - // TODO(#4): wire up createResponses() service client - consola.info( - "POST /v1/responses is not yet implemented — service client pending (#4)", - ) - return c.json( - { - error: { - message: - "Responses API service client not yet implemented. See issue #4.", - type: "not_implemented", - code: "responses_not_implemented", - }, - }, - 501, - ) + consola.debug("Responses API request payload:", JSON.stringify(payload)) + + if (state.manualApprove) { + await awaitApproval() + } + + const response = await createResponses(payload) + + if (!payload.stream) { + consola.debug( + "Responses non-streaming response:", + JSON.stringify(response).slice(0, 400), + ) + return c.json(response) + } + + // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through) + consola.debug("Responses streaming response — proxying SSE events") + return streamSSE(c, async (stream) => { + for await (const rawEvent of response as AsyncIterable<{ + data?: string + event?: string + }>) { + if (!rawEvent.data) continue + + // Forward verbatim first + await stream.writeSSE({ + event: rawEvent.event, + data: rawEvent.data, + }) + + // Parse only for debug logging + try { + const parsed = JSON.parse(rawEvent.data) as { type: string } + consola.debug("Responses SSE event:", parsed.type) + } catch { + consola.warn( + "Could not parse Responses SSE chunk for logging:", + rawEvent.data.slice(0, 200), + ) + } + } + }) } diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts new file mode 100644 index 000000000..3163182ec --- /dev/null +++ b/src/services/copilot/create-responses.ts @@ -0,0 +1,93 @@ +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { + ResponsesContentPart, + ResponsesInputItem, + ResponsesPayload, + ResponsesResponse, +} from "~/routes/responses/types" + +import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Returns true if any input item contains an `input_image` content part. + * Handles both a top-level string input and an array of input items. + */ +export function inputHasImages(payload: ResponsesPayload): boolean { + if (typeof payload.input === "string") return false + + return payload.input.some((item) => { + if (item.type !== "message") return false + if (typeof item.content === "string") return false + return item.content.some( + (part: ResponsesContentPart) => part.type === "input_image", + ) + }) +} + +/** + * Returns true if this looks like an agent/multi-turn call: + * - any input item has role "assistant", OR + * - any item has type "function_call_output" or "function_call" + */ +export function isAgentCall(payload: ResponsesPayload): boolean { + if (typeof payload.input === "string") return false + + return payload.input.some( + (item: ResponsesInputItem) => + ("role" in item && item.role === "assistant") + || item.type === "function_call_output" + || item.type === "function_call", + ) +} + +// --------------------------------------------------------------------------- +// Service client +// --------------------------------------------------------------------------- + +export const createResponses = async (payload: ResponsesPayload) => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const enableVision = inputHasImages(payload) + + const initiator = isAgentCall(payload) ? "agent" : "user" + + // TODO(#11): add Copilot-Vision-Request header when vision detected + const headers: Record = { + ...copilotHeaders(state, enableVision), + "X-Initiator": initiator, + } + + const response = await fetch(`${copilotBaseUrl(state)}/responses`, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + consola.error("Failed to create responses", response) + throw new HTTPError("Failed to create responses", response) + } + + if (payload.stream) { + return events(response) + } + + return (await response.json()) as ResponsesResponse +} + +// --------------------------------------------------------------------------- +// Streaming event types for Responses API SSE +// --------------------------------------------------------------------------- + +export interface ResponseStreamEvent { + type: string // "response.created" | "response.output_text.delta" | etc. + [key: string]: unknown +} diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts index 8f5b496c3..c5e396224 100644 --- a/tests/responses-route.test.ts +++ b/tests/responses-route.test.ts @@ -1,39 +1,93 @@ -import { describe, test, expect } from "bun:test" +import { describe, test, expect, mock, beforeAll } from "bun:test" +import { state } from "../src/lib/state" import { server } from "../src/server" -describe("POST /v1/responses stub", () => { - test("returns 501 with structured error body", async () => { +// --------------------------------------------------------------------------- +// Global fetch mock — returns a minimal non-streaming Responses API response +// --------------------------------------------------------------------------- + +const mockResponseBody = { + id: "resp_test", + object: "response", + created_at: 1_700_000_000, + model: "gpt-4o", + status: "completed", + output: [], +} + +const fetchMock = mock(() => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + }), +) + +// @ts-expect-error – mock doesn't implement full fetch signature +;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock + +// Set up copilot token so createResponses doesn't throw +beforeAll(() => { + state.copilotToken = "test-token" + state.vsCodeVersion = "1.99.0" + state.accountType = "individual" + state.manualApprove = false +}) + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("POST /v1/responses — wired handler", () => { + test("non-streaming request returns upstream JSON", async () => { const res = await server.request("/v1/responses", { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ model: "gpt-4o", input: [] }), + body: JSON.stringify({ model: "gpt-4o", input: [], stream: false }), }) - expect(res.status).toBe(501) - const body = (await res.json()) as { - error: { type: string; code: string; message: string } - } - expect(body.error.type).toBe("not_implemented") - expect(body.error.code).toBe("responses_not_implemented") - expect(typeof body.error.message).toBe("string") + expect(res.status).toBe(200) + const body = (await res.json()) as typeof mockResponseBody + expect(body.object).toBe("response") + expect(body.id).toBe("resp_test") }) - test("bare /responses path also returns 501", async () => { + test("same endpoint reachable at bare /responses path", async () => { const res = await server.request("/responses", { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ model: "gpt-4o", input: [] }), + body: JSON.stringify({ model: "gpt-4o", input: [], stream: false }), }) - expect(res.status).toBe(501) - const body = (await res.json()) as { error: { code: string } } - expect(body.error.code).toBe("responses_not_implemented") + expect(res.status).toBe(200) }) - test("empty body returns 501 (not 500)", async () => { + test("invalid JSON body returns 400", async () => { const res = await server.request("/v1/responses", { method: "POST", - // no body + headers: { "Content-Type": "application/json" }, + body: "not-json{{{", }) - expect(res.status).toBe(501) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { type: string; code: string } + } + expect(body.error.type).toBe("invalid_request_error") + expect(body.error.code).toBe("invalid_json") + }) + + test("missing copilot token returns 500", async () => { + // Temporarily clear the token via a describe-level wrapper so the + // assignment happens synchronously (no await between read and write). + const tokenBackup = state.copilotToken + state.copilotToken = undefined // synchronous — no race condition + + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [] }), + }) + expect(res.status).toBe(500) + + // eslint-disable-next-line require-atomic-updates + state.copilotToken = tokenBackup }) }) From b134dfa8f0df3f40f94efc1c980d57b50cb4e4fa Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 14:10:26 +0800 Subject: [PATCH 10/17] fix(responses): dead export, explicit types, X-Initiator and error tests (#4) - Delete unused `ResponseStreamEvent` interface from create-responses.ts - Add explicit `Promise>` return type to `createResponses` - Wire `forwardError` into responses route so upstream 4xx/5xx propagates correctly - Expand tests/responses-route.test.ts: new "createResponses behavior" describe block with X-Initiator=agent (assistant message), X-Initiator=user (pure user), X-Initiator=agent (function_call_output), and upstream 429 error path - Remove spurious `// eslint-disable-next-line require-atomic-updates` comment; fix underlying lint issue Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/route.ts | 10 +- src/services/copilot/create-responses.ts | 17 ++- tests/responses-route.test.ts | 150 +++++++++++++++++++++-- 3 files changed, 159 insertions(+), 18 deletions(-) diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts index ac6aa20f1..4be774e59 100644 --- a/src/routes/responses/route.ts +++ b/src/routes/responses/route.ts @@ -1,9 +1,17 @@ import { Hono } from "hono" +import { forwardError } from "~/lib/error" + import { handleResponses } from "./handler" const responses = new Hono() -responses.post("/", handleResponses) +responses.post("/", async (c) => { + try { + return await handleResponses(c) + } catch (error) { + return await forwardError(c, error) + } +}) export default responses diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts index 3163182ec..5bd1152b6 100644 --- a/src/services/copilot/create-responses.ts +++ b/src/services/copilot/create-responses.ts @@ -1,3 +1,5 @@ +import type { ServerSentEventMessage } from "fetch-event-stream" + import consola from "consola" import { events } from "fetch-event-stream" @@ -52,7 +54,11 @@ export function isAgentCall(payload: ResponsesPayload): boolean { // Service client // --------------------------------------------------------------------------- -export const createResponses = async (payload: ResponsesPayload) => { +export const createResponses = async ( + payload: ResponsesPayload, +): Promise< + ResponsesResponse | AsyncGenerator +> => { if (!state.copilotToken) throw new Error("Copilot token not found") const enableVision = inputHasImages(payload) @@ -82,12 +88,3 @@ export const createResponses = async (payload: ResponsesPayload) => { return (await response.json()) as ResponsesResponse } - -// --------------------------------------------------------------------------- -// Streaming event types for Responses API SSE -// --------------------------------------------------------------------------- - -export interface ResponseStreamEvent { - type: string // "response.created" | "response.output_text.delta" | etc. - [key: string]: unknown -} diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts index c5e396224..78421d4d9 100644 --- a/tests/responses-route.test.ts +++ b/tests/responses-route.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeAll } from "bun:test" +import { describe, test, expect, mock, beforeAll, beforeEach } from "bun:test" import { state } from "../src/lib/state" import { server } from "../src/server" @@ -75,10 +75,8 @@ describe("POST /v1/responses — wired handler", () => { }) test("missing copilot token returns 500", async () => { - // Temporarily clear the token via a describe-level wrapper so the - // assignment happens synchronously (no await between read and write). - const tokenBackup = state.copilotToken - state.copilotToken = undefined // synchronous — no race condition + // Temporarily clear the token — write is synchronous, no await in between. + state.copilotToken = undefined const res = await server.request("/v1/responses", { method: "POST", @@ -87,7 +85,145 @@ describe("POST /v1/responses — wired handler", () => { }) expect(res.status).toBe(500) - // eslint-disable-next-line require-atomic-updates - state.copilotToken = tokenBackup + state.copilotToken = "test-token" + }) +}) + +// --------------------------------------------------------------------------- +// createResponses behavior: X-Initiator header and error propagation +// --------------------------------------------------------------------------- + +describe("createResponses behavior", () => { + // Restore state before each test in this block + beforeEach(() => { + state.copilotToken = "test-token" + state.vsCodeVersion = "1.99.0" + state.accountType = "individual" + state.manualApprove = false + }) + + test("X-Initiator = agent when assistant message present", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [ + { type: "message", role: "user", content: "hello" }, + { type: "message", role: "assistant", content: "hi there" }, + ], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("agent") + + // Restore default mock + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock + }) + + test("X-Initiator = user for pure user messages", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [{ type: "message", role: "user", content: "just a user" }], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("user") + + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock + }) + + test("X-Initiator = agent for function_call_output item", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [ + { type: "function_call_output", call_id: "call_1", output: "{}" }, + ], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("agent") + + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock + }) + + test("upstream 4xx returns error response", async () => { + const errorMock = mock(() => + Promise.resolve({ + ok: false, + status: 429, + text: () => Promise.resolve("rate limited"), + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = errorMock + + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", stream: false, input: [] }), + }) + + expect(res.status).toBe(429) + + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock }) }) From 8341b89fcaf82366766619a5e61a3715b7b9bb83 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 14:19:56 +0800 Subject: [PATCH 11/17] fix(responses): streamSSE error handler, reasoning agent detection, test hygiene (#4) - Add onError callback to streamSSE so mid-stream upstream failures are logged and surface an error event to the client instead of silently dropping the connection - Extend isAgentCall to treat reasoning items as agent context (they only appear when echoing prior-turn encrypted reasoning) - Wrap missing-token test in try/finally to prevent state leakage - Add X-Initiator=agent test for reasoning item input Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 54 +++++++++++++--------- src/services/copilot/create-responses.ts | 6 ++- tests/responses-route.test.ts | 59 ++++++++++++++++++++---- 3 files changed, 85 insertions(+), 34 deletions(-) diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index 4a490fe38..1307f6dd7 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -44,29 +44,39 @@ export async function handleResponses(c: Context): Promise { // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through) consola.debug("Responses streaming response — proxying SSE events") - return streamSSE(c, async (stream) => { - for await (const rawEvent of response as AsyncIterable<{ - data?: string - event?: string - }>) { - if (!rawEvent.data) continue + return streamSSE( + c, + async (stream) => { + for await (const rawEvent of response as AsyncIterable<{ + data?: string + event?: string + }>) { + if (!rawEvent.data) continue - // Forward verbatim first - await stream.writeSSE({ - event: rawEvent.event, - data: rawEvent.data, - }) + // Forward verbatim first + await stream.writeSSE({ + event: rawEvent.event, + data: rawEvent.data, + }) - // Parse only for debug logging - try { - const parsed = JSON.parse(rawEvent.data) as { type: string } - consola.debug("Responses SSE event:", parsed.type) - } catch { - consola.warn( - "Could not parse Responses SSE chunk for logging:", - rawEvent.data.slice(0, 200), - ) + // Parse only for debug logging + try { + const parsed = JSON.parse(rawEvent.data) as { type: string } + consola.debug("Responses SSE event:", parsed.type) + } catch { + consola.warn( + "Could not parse Responses SSE chunk for logging:", + rawEvent.data.slice(0, 200), + ) + } } - } - }) + }, + async (err, stream) => { + consola.error("Responses SSE stream error:", err) + await stream.writeSSE({ + event: "error", + data: JSON.stringify({ message: String(err) }), + }) + }, + ) } diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts index 5bd1152b6..385b2a210 100644 --- a/src/services/copilot/create-responses.ts +++ b/src/services/copilot/create-responses.ts @@ -37,7 +37,8 @@ export function inputHasImages(payload: ResponsesPayload): boolean { /** * Returns true if this looks like an agent/multi-turn call: * - any input item has role "assistant", OR - * - any item has type "function_call_output" or "function_call" + * - any item has type "function_call_output", "function_call", or "reasoning" + * (reasoning items only appear when echoing back prior agentic turn context) */ export function isAgentCall(payload: ResponsesPayload): boolean { if (typeof payload.input === "string") return false @@ -46,7 +47,8 @@ export function isAgentCall(payload: ResponsesPayload): boolean { (item: ResponsesInputItem) => ("role" in item && item.role === "assistant") || item.type === "function_call_output" - || item.type === "function_call", + || item.type === "function_call" + || item.type === "reasoning", ) } diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts index 78421d4d9..80329be21 100644 --- a/tests/responses-route.test.ts +++ b/tests/responses-route.test.ts @@ -75,17 +75,17 @@ describe("POST /v1/responses — wired handler", () => { }) test("missing copilot token returns 500", async () => { - // Temporarily clear the token — write is synchronous, no await in between. state.copilotToken = undefined - - const res = await server.request("/v1/responses", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ model: "gpt-4o", input: [] }), - }) - expect(res.status).toBe(500) - - state.copilotToken = "test-token" + try { + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [] }), + }) + expect(res.status).toBe(500) + } finally { + state.copilotToken = "test-token" + } }) }) @@ -204,6 +204,45 @@ describe("createResponses behavior", () => { globalThis.fetch = fetchMock }) + test("X-Initiator = agent for reasoning item (multi-turn context echo)", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [ + { + type: "reasoning", + id: "rs_abc", + encrypted_content: "opaque-blob", + status: "completed", + }, + ], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("agent") + + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock + }) + test("upstream 4xx returns error response", async () => { const errorMock = mock(() => Promise.resolve({ From ac754b020def90371affb3ca7e50046946e24f39 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 14:28:02 +0800 Subject: [PATCH 12/17] fix(responses): afterEach mock cleanup, streaming test, type + log polish (#4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - afterEach restores globalThis.fetch so assertion failures can't leak mock state into subsequent tests - Add streaming smoke test: mocks ReadableStream SSE body, asserts content-type: text/event-stream and event names forwarded verbatim - Return type AsyncGenerator -> AsyncIterable (matches events() actual type) - Suppress [DONE] sentinel warn — expected at every stream end, not a parse error Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 11 +++-- src/services/copilot/create-responses.ts | 4 +- tests/responses-route.test.ts | 63 ++++++++++++++++++------ 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index 1307f6dd7..01d52ade0 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -64,10 +64,13 @@ export async function handleResponses(c: Context): Promise { const parsed = JSON.parse(rawEvent.data) as { type: string } consola.debug("Responses SSE event:", parsed.type) } catch { - consola.warn( - "Could not parse Responses SSE chunk for logging:", - rawEvent.data.slice(0, 200), - ) + // [DONE] sentinel is expected at stream end — only warn on unexpected data + if (rawEvent.data !== "[DONE]") { + consola.warn( + "Could not parse Responses SSE chunk for logging:", + rawEvent.data.slice(0, 200), + ) + } } } }, diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts index 385b2a210..1bf7d4aba 100644 --- a/src/services/copilot/create-responses.ts +++ b/src/services/copilot/create-responses.ts @@ -58,9 +58,7 @@ export function isAgentCall(payload: ResponsesPayload): boolean { export const createResponses = async ( payload: ResponsesPayload, -): Promise< - ResponsesResponse | AsyncGenerator -> => { +): Promise> => { if (!state.copilotToken) throw new Error("Copilot token not found") const enableVision = inputHasImages(payload) diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts index 80329be21..eccc085ca 100644 --- a/tests/responses-route.test.ts +++ b/tests/responses-route.test.ts @@ -1,4 +1,12 @@ -import { describe, test, expect, mock, beforeAll, beforeEach } from "bun:test" +import { + describe, + test, + expect, + mock, + beforeAll, + beforeEach, + afterEach, +} from "bun:test" import { state } from "../src/lib/state" import { server } from "../src/server" @@ -94,7 +102,7 @@ describe("POST /v1/responses — wired handler", () => { // --------------------------------------------------------------------------- describe("createResponses behavior", () => { - // Restore state before each test in this block + // Restore state and fetch mock before/after each test in this block beforeEach(() => { state.copilotToken = "test-token" state.vsCodeVersion = "1.99.0" @@ -102,6 +110,11 @@ describe("createResponses behavior", () => { state.manualApprove = false }) + afterEach(() => { + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock + }) + test("X-Initiator = agent when assistant message present", async () => { const captureMock = mock( (_url: string, opts: { headers: Record }) => @@ -132,10 +145,6 @@ describe("createResponses behavior", () => { captureMock.mock.calls[0][1] as { headers: Record } ).headers expect(sentHeaders["X-Initiator"]).toBe("agent") - - // Restore default mock - // @ts-expect-error – mock doesn't implement full fetch signature - globalThis.fetch = fetchMock }) test("X-Initiator = user for pure user messages", async () => { @@ -165,9 +174,6 @@ describe("createResponses behavior", () => { captureMock.mock.calls[0][1] as { headers: Record } ).headers expect(sentHeaders["X-Initiator"]).toBe("user") - - // @ts-expect-error – mock doesn't implement full fetch signature - globalThis.fetch = fetchMock }) test("X-Initiator = agent for function_call_output item", async () => { @@ -199,9 +205,6 @@ describe("createResponses behavior", () => { captureMock.mock.calls[0][1] as { headers: Record } ).headers expect(sentHeaders["X-Initiator"]).toBe("agent") - - // @ts-expect-error – mock doesn't implement full fetch signature - globalThis.fetch = fetchMock }) test("X-Initiator = agent for reasoning item (multi-turn context echo)", async () => { @@ -238,9 +241,6 @@ describe("createResponses behavior", () => { captureMock.mock.calls[0][1] as { headers: Record } ).headers expect(sentHeaders["X-Initiator"]).toBe("agent") - - // @ts-expect-error – mock doesn't implement full fetch signature - globalThis.fetch = fetchMock }) test("upstream 4xx returns error response", async () => { @@ -261,8 +261,39 @@ describe("createResponses behavior", () => { }) expect(res.status).toBe(429) + }) + + test("streaming request proxies SSE events and returns text/event-stream", async () => { + const sseBody = + 'event: response.created\ndata: {"type":"response.created"}\n\n' + + 'event: response.completed\ndata: {"type":"response.completed"}\n\n' + + "data: [DONE]\n\n" + const streamMock = mock(() => + Promise.resolve({ + ok: true, + headers: new Headers({ "content-type": "text/event-stream" }), + body: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(sseBody)) + controller.close() + }, + }), + }), + ) // @ts-expect-error – mock doesn't implement full fetch signature - globalThis.fetch = fetchMock + globalThis.fetch = streamMock + + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [], stream: true }), + }) + + expect(res.status).toBe(200) + expect(res.headers.get("content-type")).toMatch(/text\/event-stream/) + const text = await res.text() + expect(text).toContain("response.created") + expect(text).toContain("response.completed") }) }) From d7c4b26ba70cf733299c4364b34fe5bf1d8afdc6 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 14:32:07 +0800 Subject: [PATCH 13/17] feat(routing): model-to-endpoint mode classifier (#5) Add getModelMode/isResponsesOnlyModel to classify Responses-only models (codex family, o-pro variants), block them at /chat/completions with a clear 400, and surface a mode field on GET /v1/models. Co-Authored-By: Claude Sonnet 4.6 --- src/lib/model-routing.ts | 49 ++++++ src/routes/chat-completions/handler.ts | 14 ++ src/routes/models/route.ts | 2 + tests/model-routing.test.ts | 199 +++++++++++++++++++++++++ 4 files changed, 264 insertions(+) create mode 100644 src/lib/model-routing.ts create mode 100644 tests/model-routing.test.ts diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts new file mode 100644 index 000000000..094874663 --- /dev/null +++ b/src/lib/model-routing.ts @@ -0,0 +1,49 @@ +/** + * Model-to-endpoint routing. + * + * Copilot upstream serves some models exclusively via the Responses API + * (/responses) and others via Chat Completions (/chat/completions). + * Sending a Responses-only model to /chat/completions produces an error. + * + * Detection order: + * 1. If state.models is loaded, check model capabilities.type === "responses" + * (if the upstream ever adds this field). Currently Copilot doesn't set it, + * so we fall through to step 2. + * 2. Static prefix/suffix list (known Responses-only models as of 2025-05). + * + * "Responses-only" models: all gpt-5*-codex variants, o1-pro, o3-pro. + * Everything else (gpt-4o, gpt-5, o1, o3, o4-mini, claude-*, gemini-*) uses + * Chat Completions (or native Anthropic pass-through for Claude). + */ + +import { state } from "~/lib/state" + +/** Endpoint mode for routing. */ +export type ModelMode = "chat" | "responses" + +/** + * Returns the upstream endpoint mode for the given model ID. + * "responses" = must use /responses; "chat" = use /chat/completions (or native Anthropic). + */ +export function getModelMode(modelId: string): ModelMode { + // 1. Check state.models capabilities if available (future-proof) + if (state.models?.data) { + const entry = state.models.data.find((m) => m.id === modelId) + if (entry?.capabilities.type === "responses") return "responses" + } + + // 2. Static heuristic: Responses-only models have "codex" in the name + // or are o-series "pro" variants. + return isResponsesOnlyModel(modelId) ? "responses" : "chat" +} + +/** + * Returns true if the model is known to be Responses-only on Copilot upstream. + */ +export function isResponsesOnlyModel(modelId: string): boolean { + // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc. + if (modelId.includes("codex")) return true + // o-pro family: o1-pro, o3-pro + if (/^o\d+-pro$/.test(modelId)) return true + return false +} diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9ed..5cbc290c1 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -4,6 +4,7 @@ import consola from "consola" import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" +import { isResponsesOnlyModel } from "~/lib/model-routing" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" @@ -20,6 +21,19 @@ export async function handleCompletion(c: Context) { let payload = await c.req.json() consola.debug("Request payload:", JSON.stringify(payload).slice(-400)) + if (isResponsesOnlyModel(payload.model)) { + return c.json( + { + error: { + message: `Model "${payload.model}" is only available via the Responses API. Use POST /v1/responses instead.`, + type: "invalid_request_error", + code: "responses_only_model", + }, + }, + 400, + ) + } + // Find the selected model const selectedModel = state.models?.data.find( (model) => model.id === payload.model, diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts index 5254e2af7..2de4b797c 100644 --- a/src/routes/models/route.ts +++ b/src/routes/models/route.ts @@ -1,6 +1,7 @@ import { Hono } from "hono" import { forwardError } from "~/lib/error" +import { getModelMode } from "~/lib/model-routing" import { state } from "~/lib/state" import { cacheModels } from "~/lib/utils" @@ -21,6 +22,7 @@ modelRoutes.get("/", async (c) => { created_at: new Date(0).toISOString(), // No date available from source owned_by: model.vendor, display_name: model.name, + mode: getModelMode(model.id), })) return c.json({ diff --git a/tests/model-routing.test.ts b/tests/model-routing.test.ts new file mode 100644 index 000000000..2243a0f5f --- /dev/null +++ b/tests/model-routing.test.ts @@ -0,0 +1,199 @@ +import { describe, test, expect, afterEach, beforeAll } from "bun:test" + +import { getModelMode, isResponsesOnlyModel } from "../src/lib/model-routing" +import { state } from "../src/lib/state" +import { server } from "../src/server" + +// --------------------------------------------------------------------------- +// isResponsesOnlyModel — pure unit tests (no state needed) +// --------------------------------------------------------------------------- + +describe("isResponsesOnlyModel", () => { + test("gpt-5-codex → responses-only", () => + expect(isResponsesOnlyModel("gpt-5-codex")).toBe(true)) + test("gpt-5.1-codex → responses-only", () => + expect(isResponsesOnlyModel("gpt-5.1-codex")).toBe(true)) + test("gpt-5.1-codex-max → responses-only", () => + expect(isResponsesOnlyModel("gpt-5.1-codex-max")).toBe(true)) + test("gpt-5.3-codex → responses-only", () => + expect(isResponsesOnlyModel("gpt-5.3-codex")).toBe(true)) + test("o1-pro → responses-only", () => + expect(isResponsesOnlyModel("o1-pro")).toBe(true)) + test("o3-pro → responses-only", () => + expect(isResponsesOnlyModel("o3-pro")).toBe(true)) + test("gpt-4o → chat", () => + expect(isResponsesOnlyModel("gpt-4o")).toBe(false)) + test("gpt-5 → chat", () => expect(isResponsesOnlyModel("gpt-5")).toBe(false)) + test("o1 → chat", () => expect(isResponsesOnlyModel("o1")).toBe(false)) + test("o3 → chat", () => expect(isResponsesOnlyModel("o3")).toBe(false)) + test("claude-sonnet-4-5 → chat", () => + expect(isResponsesOnlyModel("claude-sonnet-4-5")).toBe(false)) + test("o4-mini → chat", () => + expect(isResponsesOnlyModel("o4-mini")).toBe(false)) +}) + +// --------------------------------------------------------------------------- +// getModelMode — with loaded models list (state mutation) +// --------------------------------------------------------------------------- + +describe("getModelMode — with loaded models list", () => { + const savedModels = state.models + + afterEach(() => { + state.models = savedModels + }) + + test("model with capabilities.type=responses in list → responses", () => { + state.models = { + object: "list", + data: [ + { + id: "future-responses-model", + vendor: "OpenAI", + name: "Future Model", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "responses", // upstream sets this + }, + }, + ], + } + expect(getModelMode("future-responses-model")).toBe("responses") + }) + + test("model with capabilities.type=chat in list → falls through to heuristic", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-5-codex", + vendor: "OpenAI", + name: "Codex", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + // capabilities.type is "chat" so list check doesn't return "responses", + // falls through to static heuristic which sees "codex" → responses + expect(getModelMode("gpt-5-codex")).toBe("responses") + }) + + test("regular chat model → chat", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-4o", + vendor: "OpenAI", + name: "GPT-4o", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(getModelMode("gpt-4o")).toBe("chat") + }) + + test("state.models undefined → heuristic (codex → responses)", () => { + state.models = undefined + expect(getModelMode("gpt-5-codex")).toBe("responses") + }) + + test("state.models undefined → heuristic (gpt-4o → chat)", () => { + state.models = undefined + expect(getModelMode("gpt-4o")).toBe("chat") + }) +}) + +// --------------------------------------------------------------------------- +// Route-level: POST /v1/chat/completions blocks Responses-only models +// --------------------------------------------------------------------------- + +describe("chat-completions route blocks responses-only models", () => { + beforeAll(() => { + state.copilotToken = "test-token" + state.vsCodeVersion = "1.99.0" + state.accountType = "individual" + state.manualApprove = false + }) + + test("gpt-5-codex → 400 with responses_only_model code", async () => { + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5-codex", + messages: [{ role: "user", content: "hello" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { type: string; code: string; message: string } + } + expect(body.error.code).toBe("responses_only_model") + expect(body.error.type).toBe("invalid_request_error") + expect(body.error.message).toContain("gpt-5-codex") + expect(body.error.message).toContain("/v1/responses") + }) + + test("o1-pro → 400 with responses_only_model code", async () => { + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "o1-pro", + messages: [{ role: "user", content: "hello" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { code: string } + } + expect(body.error.code).toBe("responses_only_model") + }) + + test("gpt-5.1-codex-max → 400 with responses_only_model code", async () => { + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.1-codex-max", + messages: [{ role: "user", content: "hello" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { code: string } + } + expect(body.error.code).toBe("responses_only_model") + }) +}) From 394203ab0d76a143059355bf15ed97fc100c16df Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 15:22:35 +0800 Subject: [PATCH 14/17] fix(routing): capabilities short-circuit, dated alias regex, guard ordering, test fixes (#5) Co-Authored-By: Claude Sonnet 4.6 --- src/lib/model-routing.ts | 6 +- src/routes/chat-completions/handler.ts | 8 +-- tests/model-routing.test.ts | 95 ++++++++++++++++++++++++-- 3 files changed, 97 insertions(+), 12 deletions(-) diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts index 094874663..b2aeb3abc 100644 --- a/src/lib/model-routing.ts +++ b/src/lib/model-routing.ts @@ -30,6 +30,7 @@ export function getModelMode(modelId: string): ModelMode { if (state.models?.data) { const entry = state.models.data.find((m) => m.id === modelId) if (entry?.capabilities.type === "responses") return "responses" + if (entry?.capabilities.type === "chat") return "chat" // trust upstream when explicit } // 2. Static heuristic: Responses-only models have "codex" in the name @@ -43,7 +44,8 @@ export function getModelMode(modelId: string): ModelMode { export function isResponsesOnlyModel(modelId: string): boolean { // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc. if (modelId.includes("codex")) return true - // o-pro family: o1-pro, o3-pro - if (/^o\d+-pro$/.test(modelId)) return true + // o-pro family: o1-pro, o3-pro, o1-pro-2025-04-09, o3-pro-2025-01-10, etc. + // Covers: o\d+-pro(?:-\d{4}-\d{2}-\d{2})? — requires string to end after "pro" or date + if (/^o\d+-pro(?:-\d{4}-\d{2}-\d{2})?$/.test(modelId)) return true return false } diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 5cbc290c1..456d1282b 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -4,7 +4,7 @@ import consola from "consola" import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" -import { isResponsesOnlyModel } from "~/lib/model-routing" +import { getModelMode } from "~/lib/model-routing" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" @@ -16,12 +16,10 @@ import { } from "~/services/copilot/create-chat-completions" export async function handleCompletion(c: Context) { - await checkRateLimit(state) - let payload = await c.req.json() consola.debug("Request payload:", JSON.stringify(payload).slice(-400)) - if (isResponsesOnlyModel(payload.model)) { + if (getModelMode(payload.model) === "responses") { return c.json( { error: { @@ -34,6 +32,8 @@ export async function handleCompletion(c: Context) { ) } + await checkRateLimit(state) + // Find the selected model const selectedModel = state.models?.data.find( (model) => model.id === payload.model, diff --git a/tests/model-routing.test.ts b/tests/model-routing.test.ts index 2243a0f5f..8d758643f 100644 --- a/tests/model-routing.test.ts +++ b/tests/model-routing.test.ts @@ -1,4 +1,11 @@ -import { describe, test, expect, afterEach, beforeAll } from "bun:test" +import { + describe, + test, + expect, + afterEach, + beforeEach, + beforeAll, +} from "bun:test" import { getModelMode, isResponsesOnlyModel } from "../src/lib/model-routing" import { state } from "../src/lib/state" @@ -30,6 +37,12 @@ describe("isResponsesOnlyModel", () => { expect(isResponsesOnlyModel("claude-sonnet-4-5")).toBe(false)) test("o4-mini → chat", () => expect(isResponsesOnlyModel("o4-mini")).toBe(false)) + test("o4-pro → responses-only", () => + expect(isResponsesOnlyModel("o4-pro")).toBe(true)) + test("o1-pro-2025-04-09 (dated alias) → responses-only", () => + expect(isResponsesOnlyModel("o1-pro-2025-04-09")).toBe(true)) + test("o3-pro-mini → NOT responses-only (not a pro variant)", () => + expect(isResponsesOnlyModel("o3-pro-mini")).toBe(false)) }) // --------------------------------------------------------------------------- @@ -37,7 +50,11 @@ describe("isResponsesOnlyModel", () => { // --------------------------------------------------------------------------- describe("getModelMode — with loaded models list", () => { - const savedModels = state.models + let savedModels: typeof state.models + + beforeEach(() => { + savedModels = state.models + }) afterEach(() => { state.models = savedModels @@ -69,7 +86,7 @@ describe("getModelMode — with loaded models list", () => { expect(getModelMode("future-responses-model")).toBe("responses") }) - test("model with capabilities.type=chat in list → falls through to heuristic", () => { + test("model with explicit capabilities.type=chat in list → chat (upstream authoritative)", () => { state.models = { object: "list", data: [ @@ -92,9 +109,8 @@ describe("getModelMode — with loaded models list", () => { }, ], } - // capabilities.type is "chat" so list check doesn't return "responses", - // falls through to static heuristic which sees "codex" → responses - expect(getModelMode("gpt-5-codex")).toBe("responses") + // capabilities.type = "chat" is authoritative → returns "chat" even though name contains "codex" + expect(getModelMode("gpt-5-codex")).toBe("chat") }) test("regular chat model → chat", () => { @@ -139,6 +155,8 @@ describe("getModelMode — with loaded models list", () => { // --------------------------------------------------------------------------- describe("chat-completions route blocks responses-only models", () => { + let savedModels: typeof state.models + beforeAll(() => { state.copilotToken = "test-token" state.vsCodeVersion = "1.99.0" @@ -146,6 +164,14 @@ describe("chat-completions route blocks responses-only models", () => { state.manualApprove = false }) + beforeEach(() => { + savedModels = state.models + }) + + afterEach(() => { + state.models = savedModels + }) + test("gpt-5-codex → 400 with responses_only_model code", async () => { const res = await server.request("/v1/chat/completions", { method: "POST", @@ -196,4 +222,61 @@ describe("chat-completions route blocks responses-only models", () => { } expect(body.error.code).toBe("responses_only_model") }) + + test("model with capabilities.type=responses in state is blocked at /v1/chat/completions", async () => { + // Set up a model that only the capabilities path would catch (not the heuristic) + state.models = { + object: "list", + data: [ + { + id: "o5-turbo", // no "codex", not "o\d+-pro" + vendor: "OpenAI", + name: "O5 Turbo", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "responses", + }, + }, + ], + } + + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "o5-turbo", + messages: [{ role: "user", content: "hi" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { error: { code: string } } + expect(body.error.code).toBe("responses_only_model") + }) + + test("gpt-4o is NOT blocked at /v1/chat/completions (chat model)", async () => { + // gpt-4o is a chat model — should pass the guard (will fail at upstream but not with 400) + // We just need status !== 400 with code responses_only_model + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "hi" }], + }), + }) + // Should NOT return the routing 400 + if (res.status === 400) { + const body = (await res.json()) as { error?: { code?: string } } + expect(body.error?.code).not.toBe("responses_only_model") + } + // Any other status is fine (500 from missing upstream, etc.) + }) }) From 68d6b94aabdc8171dd7002ffb1d2be2b49d620f1 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 15:32:45 +0800 Subject: [PATCH 15/17] fix(routing): undefined model guard, codex regex, capabilities type narrowing (#5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - getModelMode: return "chat" early when modelId is falsy — prevents TypeError crash on requests missing the model field - isResponsesOnlyModel: anchor codex check with word boundaries (/(?:^|-)codex(?:-|$)/) to avoid false-positives on future codex-mini - ModelCapabilities.type: narrow to "chat"|"responses"|(string & {}) so routing logic is type-checked against known upstream values Co-Authored-By: Claude Sonnet 4.6 --- src/lib/model-routing.ts | 6 +++++- src/services/copilot/get-models.ts | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts index b2aeb3abc..89f6ddd30 100644 --- a/src/lib/model-routing.ts +++ b/src/lib/model-routing.ts @@ -26,6 +26,9 @@ export type ModelMode = "chat" | "responses" * "responses" = must use /responses; "chat" = use /chat/completions (or native Anthropic). */ export function getModelMode(modelId: string): ModelMode { + // Guard: treat missing/empty model as "chat" — upstream will reject with a proper error + if (!modelId) return "chat" + // 1. Check state.models capabilities if available (future-proof) if (state.models?.data) { const entry = state.models.data.find((m) => m.id === modelId) @@ -43,7 +46,8 @@ export function getModelMode(modelId: string): ModelMode { */ export function isResponsesOnlyModel(modelId: string): boolean { // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc. - if (modelId.includes("codex")) return true + // Anchored to word boundaries to avoid matching hypothetical future "codex-mini" chat models. + if (/(?:^|-)codex(?:-|$)/.test(modelId)) return true // o-pro family: o1-pro, o3-pro, o1-pro-2025-04-09, o3-pro-2025-01-10, etc. // Covers: o\d+-pro(?:-\d{4}-\d{2}-\d{2})? — requires string to end after "pro" or date if (/^o\d+-pro(?:-\d{4}-\d{2}-\d{2})?$/.test(modelId)) return true diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 3cfa30af0..fb215bce8 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -36,7 +36,8 @@ interface ModelCapabilities { object: string supports: ModelSupports tokenizer: string - type: string + /** Known values: "chat" | "responses". Open string for forward-compat. */ + type: "chat" | "responses" | (string & {}) } export interface Model { From e1df9cce5ad460808ccdd803eb822f8bc05c9832 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 15:36:33 +0800 Subject: [PATCH 16/17] feat(responses): preserve encrypted_content, strip null status (#6) Adds a sanitiseResponsesOutput translation layer that guarantees encrypted_content on reasoning items is never stripped (required for multi-turn continuity) and removes status: null fields that Copilot upstream rejects on re-submission (litellm PR #22370). Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 9 +- src/routes/responses/translation.ts | 44 ++++++++++ tests/responses-translation.test.ts | 130 ++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 src/routes/responses/translation.ts create mode 100644 tests/responses-translation.test.ts diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index 01d52ade0..44de3eaf5 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -7,7 +7,9 @@ import { awaitApproval } from "~/lib/approval" import { state } from "~/lib/state" import { createResponses } from "~/services/copilot/create-responses" -import type { ResponsesPayload } from "./types" +import type { ResponsesPayload, ResponsesResponse } from "./types" + +import { sanitiseResponsesOutput } from "./translation" export async function handleResponses(c: Context): Promise { let payload: ResponsesPayload @@ -35,11 +37,12 @@ export async function handleResponses(c: Context): Promise { const response = await createResponses(payload) if (!payload.stream) { + const sanitised = sanitiseResponsesOutput(response as ResponsesResponse) consola.debug( "Responses non-streaming response:", - JSON.stringify(response).slice(0, 400), + JSON.stringify(sanitised).slice(0, 400), ) - return c.json(response) + return c.json(sanitised) } // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through) diff --git a/src/routes/responses/translation.ts b/src/routes/responses/translation.ts new file mode 100644 index 000000000..262a79984 --- /dev/null +++ b/src/routes/responses/translation.ts @@ -0,0 +1,44 @@ +/** + * Response translation for the Responses API path. + * + * Key invariants: + * - reasoning items MUST preserve `encrypted_content` verbatim (required for + * multi-turn continuity — see issue #6 and litellm PR #17130) + * - `status: null` fields are stripped (Copilot upstream rejects null status + * on subsequent turns — see litellm PR #22370) + */ + +import type { ResponsesResponse, ResponsesOutputItem } from "./types" + +// Upstream (e.g. litellm) may send `status: null` even though our TypeScript +// types forbid it. Use a separate loose type to represent that reality. +type LooseOutputItem = Omit & { + status?: string | null +} + +/** + * Sanitise a Responses API response object before forwarding to the client. + * + * Guarantees: + * 1. `encrypted_content` on reasoning items is preserved (never stripped). + * 2. `status: null` is removed from all output items. + * 3. All other fields are passed through untouched. + */ +export function sanitiseResponsesOutput( + response: ResponsesResponse, +): ResponsesResponse { + return { + ...response, + output: response.output.map((item) => sanitiseOutputItem(item)), + } +} + +function sanitiseOutputItem(item: ResponsesOutputItem): ResponsesOutputItem { + // Cast to the loose type so the null-status check is valid at compile time. + const loose = item as unknown as LooseOutputItem + if (loose.status === null) { + const { status: _dropped, ...rest } = loose + return rest as unknown as ResponsesOutputItem + } + return item +} diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts new file mode 100644 index 000000000..0d6d06eed --- /dev/null +++ b/tests/responses-translation.test.ts @@ -0,0 +1,130 @@ +import { describe, test, expect } from "bun:test" + +import type { ResponsesResponse } from "../src/routes/responses/types" + +import { sanitiseResponsesOutput } from "../src/routes/responses/translation" + +// Minimal valid response fixture +function makeResponse(output: ResponsesResponse["output"]): ResponsesResponse { + return { + id: "resp_test", + object: "response", + created_at: 1_700_000_000, + model: "gpt-5", + status: "completed", + output, + } +} + +describe("sanitiseResponsesOutput", () => { + test("preserves encrypted_content on reasoning items", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_abc", + encrypted_content: "opaque-blob-xyz", + summary: [{ type: "summary_text", text: "thought about it" }], + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + const reasoning = result.output[0] as { encrypted_content?: string } + expect(reasoning.encrypted_content).toBe("opaque-blob-xyz") + }) + + test("strips status: null from reasoning items", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_null_status", + encrypted_content: "blob", + // status is null — TypeScript won't allow this directly but upstream sends it + } as unknown as ResponsesResponse["output"][0], + ]) + + const result = sanitiseResponsesOutput(response) + const item = result.output[0] as Record + expect("status" in item).toBe(false) + }) + + test("preserves non-null status on reasoning items", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_completed", + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect((result.output[0] as { status: string }).status).toBe("completed") + }) + + test("passes message items through unchanged", () => { + const response = makeResponse([ + { + type: "message", + id: "msg_1", + role: "assistant", + content: [{ type: "output_text", text: "hello" }], + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect(result.output[0]).toEqual(response.output[0]) + }) + + test("passes function_call items through unchanged", () => { + const response = makeResponse([ + { + type: "function_call", + id: "fc_1", + call_id: "call_abc", + name: "get_weather", + arguments: '{"city":"London"}', + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect(result.output[0]).toEqual(response.output[0]) + }) + + test("handles empty output array", () => { + const response = makeResponse([]) + const result = sanitiseResponsesOutput(response) + expect(result.output).toEqual([]) + }) + + test("top-level response fields are preserved", () => { + const response = makeResponse([]) + response.usage = { input_tokens: 100, output_tokens: 50, total_tokens: 150 } + const result = sanitiseResponsesOutput(response) + expect(result.usage).toEqual(response.usage) + expect(result.id).toBe("resp_test") + expect(result.model).toBe("gpt-5") + }) + + test("multiple mixed output items all sanitised", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_1", + encrypted_content: "secret", + status: null as unknown as "completed", + }, + { + type: "message", + id: "msg_1", + role: "assistant", + content: [{ type: "output_text", text: "answer" }], + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + // First item: status stripped, encrypted_content preserved + const first = result.output[0] as Record + expect("status" in first).toBe(false) + expect(first["encrypted_content"]).toBe("secret") + // Second item: unchanged + expect(result.output[1]).toEqual(response.output[1]) + }) +}) From b191620e95726bfe0b798d2b5e39da6d9d69e377 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 22:11:45 +0800 Subject: [PATCH 17/17] fix(responses): sanitise SSE stream + fix false-positive null-status test (#6) - Export sanitiseOutputItem so streaming path can use it - Streaming handler parses each SSE event and strips status:null from embedded item/output fields before forwarding - Fix false-positive test: inject status:null explicitly + assert encrypted_content survives; add in_progress / incomplete passthrough tests Co-Authored-By: Claude Sonnet 4.6 --- src/routes/responses/handler.ts | 55 ++++++++++++++++++++--------- src/routes/responses/translation.ts | 8 ++++- tests/responses-translation.test.ts | 32 +++++++++++++++-- 3 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index 44de3eaf5..53ead897e 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -9,7 +9,7 @@ import { createResponses } from "~/services/copilot/create-responses" import type { ResponsesPayload, ResponsesResponse } from "./types" -import { sanitiseResponsesOutput } from "./translation" +import { sanitiseOutputItem, sanitiseResponsesOutput } from "./translation" export async function handleResponses(c: Context): Promise { let payload: ResponsesPayload @@ -56,25 +56,46 @@ export async function handleResponses(c: Context): Promise { }>) { if (!rawEvent.data) continue - // Forward verbatim first - await stream.writeSSE({ - event: rawEvent.event, - data: rawEvent.data, - }) - - // Parse only for debug logging - try { - const parsed = JSON.parse(rawEvent.data) as { type: string } - consola.debug("Responses SSE event:", parsed.type) - } catch { - // [DONE] sentinel is expected at stream end — only warn on unexpected data - if (rawEvent.data !== "[DONE]") { - consola.warn( - "Could not parse Responses SSE chunk for logging:", - rawEvent.data.slice(0, 200), + // Sanitise status:null from embedded output items before forwarding. + // SSE events like response.output_item.done carry full item snapshots + // which can contain null status fields rejected by upstream on re-submission. + let forwardData = rawEvent.data + if (rawEvent.data !== "[DONE]") { + try { + const parsed = JSON.parse(rawEvent.data) as Record + consola.debug( + "Responses SSE event:", + (parsed as { type?: string }).type, ) + // Sanitise embedded item or output array + if (parsed["item"]) { + parsed["item"] = sanitiseOutputItem( + parsed["item"] as Parameters[0], + ) + } + if (Array.isArray(parsed["output"])) { + parsed["output"] = ( + parsed["output"] as Array< + Parameters[0] + > + ).map((i) => sanitiseOutputItem(i)) + } + forwardData = JSON.stringify(parsed) + } catch { + // [DONE] sentinel or truly malformed chunk + if (rawEvent.data !== "[DONE]") { + consola.warn( + "Could not parse Responses SSE chunk for logging:", + rawEvent.data.slice(0, 200), + ) + } } } + + await stream.writeSSE({ + event: rawEvent.event, + data: forwardData, + }) } }, async (err, stream) => { diff --git a/src/routes/responses/translation.ts b/src/routes/responses/translation.ts index 262a79984..4fa2c456d 100644 --- a/src/routes/responses/translation.ts +++ b/src/routes/responses/translation.ts @@ -33,7 +33,13 @@ export function sanitiseResponsesOutput( } } -function sanitiseOutputItem(item: ResponsesOutputItem): ResponsesOutputItem { +/** + * Sanitise a single output item from an SSE event or non-streaming response. + * Exported so the streaming path can apply the same logic per-event. + */ +export function sanitiseOutputItem( + item: ResponsesOutputItem, +): ResponsesOutputItem { // Cast to the loose type so the null-status check is valid at compile time. const loose = item as unknown as LooseOutputItem if (loose.status === null) { diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts index 0d6d06eed..b1f4e26c2 100644 --- a/tests/responses-translation.test.ts +++ b/tests/responses-translation.test.ts @@ -32,19 +32,47 @@ describe("sanitiseResponsesOutput", () => { expect(reasoning.encrypted_content).toBe("opaque-blob-xyz") }) - test("strips status: null from reasoning items", () => { + test("strips status: null from reasoning items (and preserves encrypted_content)", () => { + // Explicitly inject status: null — upstream sends this despite TS types forbidding it const response = makeResponse([ { type: "reasoning", id: "rs_null_status", encrypted_content: "blob", - // status is null — TypeScript won't allow this directly but upstream sends it + status: null, } as unknown as ResponsesResponse["output"][0], ]) const result = sanitiseResponsesOutput(response) const item = result.output[0] as Record + // status must be stripped expect("status" in item).toBe(false) + // encrypted_content must survive + expect(item["encrypted_content"]).toBe("blob") + }) + + test("preserves status: in_progress unchanged", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_inprogress", + status: "in_progress", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect((result.output[0] as { status: string }).status).toBe("in_progress") + }) + + test("preserves status: incomplete unchanged", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_incomplete", + status: "incomplete", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect((result.output[0] as { status: string }).status).toBe("incomplete") }) test("preserves non-null status on reasoning items", () => {