diff --git a/.gitignore b/.gitignore index 577a4f199..9650fd579 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,4 @@ node_modules/ .eslintcache # build output -dist/ \ No newline at end of file +dist/.crew/ diff --git a/bun.lock b/bun.lock index 20e895e7f..9ece87578 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-api", diff --git a/docs/prd/native-anthropic-passthrough.md b/docs/prd/native-anthropic-passthrough.md new file mode 100644 index 000000000..606c98ca8 --- /dev/null +++ b/docs/prd/native-anthropic-passthrough.md @@ -0,0 +1,74 @@ +# Native Anthropic Pass-Through for Claude Models + +## Status +Approved + +## Overview +Route Anthropic `/v1/messages` requests for Claude models directly to the GitHub Copilot upstream's native Anthropic endpoint, bypassing the existing OpenAI translation layer. This preserves thinking blocks with `signature` field, `top_k`, `cache_control`, and richer usage stats — none of which survive the current translation round-trip. + +## Motivation +GitHub Copilot's upstream (`api.enterprise.githubcopilot.com`) natively speaks the Anthropic Messages API for all Claude 4.5+ models. The current code path translates Anthropic → OpenAI → sends → translates back, losing: +- `thinking` blocks (completely dropped) +- `signature` field on thinking blocks (required for multi-turn reasoning) +- `cache_creation_input_tokens` in usage +- `top_k` parameter +- `cache_control` on system/user blocks + +The fix: detect Claude models by `vendor === "Anthropic"` from the `/models` endpoint, and forward requests verbatim to `/v1/messages` upstream. + +## Requirements + +1. **`create-messages-native.ts`** — Service client that POSTs Anthropic payloads directly to `${copilotBaseUrl}/v1/messages` with correct headers (`anthropic-version`, `anthropic-beta`). +2. **Route dispatch** — `handler.ts` checks `isNativeAnthropicModel(model)` and branches to native path for Claude, translation path for everything else. +3. **`native-models.ts`** — `isNativeAnthropicModel(modelId)` checks `state.models` vendor field; falls back to `claude-` prefix heuristic before models load. +4. **Type fixes** — `anthropic-types.ts`: `signature?` on `AnthropicThinkingBlock`; union `thinking` type for adaptive (opus-4.7+); `output_config`; `AnthropicImageBlock` URL source; `AnthropicToolResultBlock.content` widened. +5. **Adaptive thinking upgrade** — `create-messages-native.ts` auto-upgrades `{ type: "enabled" }` → `{ type: "adaptive" }` + `output_config.effort` for `claude-opus-4.7+` models. +6. **SSE proxy** — Streaming responses from native path forwarded verbatim to client (no re-translation needed). + +## Acceptance Criteria + +- Claude models (`vendor === "Anthropic"`) route to native path; non-Claude models route to translation path. +- Thinking blocks with `signature` field returned to client in both streaming and non-streaming. +- Multi-turn conversations with thinking blocks (echoing `signature`) work correctly. +- `claude-opus-4.7+` with `{ type: "enabled" }` thinking auto-upgrades to adaptive format; no HTTP 400. +- All existing tests pass; new tests cover native vs. translation dispatch. + +## Technical Approach + +### Model detection +`state.models.data` from `/models` endpoint has `vendor: "Anthropic"` for all Claude models. `isNativeAnthropicModel()` checks this first, falls back to `startsWith("claude-")` heuristic. + +### Headers for native path +``` +anthropic-version: 2023-06-01 +anthropic-beta: interleaved-thinking-2025-05-14,prompt-caching-2024-07-31 +``` +Plus all standard Copilot headers (auth, editor-version, etc.). + +### Streaming proxy +Native upstream sends proper Anthropic SSE events. Parse `event.type` for logging; forward `rawEvent.data` verbatim. No translation needed. + +### Adaptive thinking (opus-4.7+) +If model matches `/^claude-opus-4[.-](\d+)/` with minor ≥ 7, auto-upgrade `{ type: "enabled", budget_tokens: N }` → `{ type: "adaptive" }` + `output_config: { effort: "medium" }`. + +## File Changes + +**New:** +- `src/services/copilot/create-messages-native.ts` +- `src/services/copilot/native-models.ts` + +**Modified:** +- `src/routes/messages/anthropic-types.ts` — type fixes +- `src/routes/messages/handler.ts` — dispatch logic +- `src/routes/messages/non-stream-translation.ts` — remove stale comment; fix image source narrowing + +## Testing Strategy +- Unit: `isNativeAnthropicModel()` with populated vs empty `state.models` +- Unit: `buildUpstreamPayload()` adaptive thinking upgrade +- Integration: handler routes Claude models to native, GPT models to translation +- Existing translation tests must still pass + +## Out of Scope +- Persistent caching of native responses +- URL image sources (rejected by upstream; type kept for fidelity) +- Responses API (#1 epic) diff --git a/docs/prd/vscode-header-simulation.md b/docs/prd/vscode-header-simulation.md new file mode 100644 index 000000000..30ae0a91a --- /dev/null +++ b/docs/prd/vscode-header-simulation.md @@ -0,0 +1,67 @@ +# VS Code Header Simulation Accuracy + +## Status +Approved + +## Overview +Auto-detect and keep current the VS Code + Copilot Chat version strings used in every upstream request header, so traffic looks indistinguishable from a real VS Code editor session. + +## Motivation +copilot-api impersonates VS Code Copilot Chat toward GitHub's upstream. Hardcoded version strings become stale as VS Code releases new versions every month. Stale strings increase the distinguishability of copilot-api traffic from legitimate editor traffic. The fix: query live version sources at startup, cache them, and fall back to hardcoded values on failure — so headers always reflect the latest shipping release. + +## Requirements + +1. **VS Code version auto-detect** — On startup, query `https://update.code.visualstudio.com/api/releases/stable` (JSON array, first element is latest stable version). Use the result for `editor-version: vscode/`. +2. **Copilot Chat extension version auto-detect** — On startup, query the VS Code Marketplace API for `GitHub.copilot-chat` and extract the latest version. Use it for `editor-plugin-version: copilot-chat/` and `user-agent: GitHubCopilotChat/`. +3. **24-hour TTL in-memory cache** — Cache both versions for 24 h so repeated token refreshes don't re-query external APIs unnecessarily. +4. **Graceful fallback** — If either fetch fails (network error, timeout, unexpected shape), log a warning and continue with the existing hardcoded fallback values. Never crash startup. +5. **Startup log** — At `consola.info` level, print the resolved version strings (`VSCode: X.Y.Z`, `Copilot Chat: A.B.C`) so the user can verify what's being used. +6. **`x-request-id`** — Confirm it is already generated per-request via `crypto.randomUUID()` (it is — no change needed). +7. **Header documentation** — Add a comment block in `src/lib/api-config.ts` explaining each header's source and how to update it. + +## Acceptance Criteria + +- On a clean startup with network access, printed versions match the latest stable VS Code release visible at `https://code.visualstudio.com/updates/`. +- On startup with network blocked, a warning is logged and the server still starts with fallback values. +- No new CLI flags required — version detection is automatic. +- All existing tests pass. + +## Technical Approach + +### VS Code version +`GET https://update.code.visualstudio.com/api/releases/stable` returns a JSON array of version strings. Take `[0]`. + +### Copilot Chat version +VS Code Marketplace API: +``` +GET https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery +Content-Type: application/json +Accept: application/json;api-version=3.0-preview.1 + +Body: { + "filters": [{ "criteria": [{ "filterType": 7, "value": "GitHub.copilot-chat" }] }], + "flags": 529 +} +``` +Response path: `results[0].extensions[0].versions[0].version` + +### Caching +Simple module-level `{ version: string, fetchedAt: number }` objects. If `Date.now() - fetchedAt < 24 * 60 * 60 * 1000`, return cached value. + +### File changes +- `src/services/get-vscode-version.ts` — extend with VS Code stable API; keep AUR fallback as secondary fallback. +- `src/services/get-copilot-chat-version.ts` — new file for Copilot Chat extension version. +- `src/lib/utils.ts` — `cacheVSCodeVersion()` also calls `cacheCopilotChatVersion()`. +- `src/lib/state.ts` — add `copilotChatVersion?: string`. +- `src/lib/api-config.ts` — use `state.copilotChatVersion` for `editor-plugin-version` and `user-agent`; add header documentation comment. + +## Testing Strategy +- Unit test `get-vscode-version.ts`: mock fetch → returns parsed version; mock fail → returns fallback. +- Unit test `get-copilot-chat-version.ts`: mock fetch → returns parsed version; mock fail → returns fallback. +- Existing translation tests must continue to pass. + +## Out of Scope +- `OpenAI-Organization` header (not confirmed in VS Code traffic). +- `X-Vscode-User-Agent-Library-Comment` (not confirmed). +- Persistent disk cache (in-memory TTL is sufficient for a single server process). +- Auto-restart on version change. diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts index 83bce92ad..09d235bf7 100644 --- a/src/lib/api-config.ts +++ b/src/lib/api-config.ts @@ -1,5 +1,8 @@ import { randomUUID } from "node:crypto" +import { FALLBACK as COPILOT_CHAT_VERSION_FALLBACK } from "~/services/get-copilot-chat-version" +import { FALLBACK as VSCODE_VERSION_FALLBACK } from "~/services/get-vscode-version" + import type { State } from "./state" export const standardHeaders = () => ({ @@ -7,24 +10,38 @@ export const standardHeaders = () => ({ accept: "application/json", }) -const COPILOT_VERSION = "0.26.7" -const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}` -const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}` - const API_VERSION = "2025-04-01" export const copilotBaseUrl = (state: State) => state.accountType === "individual" ? "https://api.githubcopilot.com" : `https://api.${state.accountType}.githubcopilot.com` + +/** + * Headers sent with every upstream request to mimic VS Code Copilot Chat traffic. + * + * Header sources: + * - Authorization — Copilot token from GitHub OAuth flow + * - editor-version — Auto-detected VS Code stable release (update.code.visualstudio.com) + * - editor-plugin-version — Auto-detected GitHub.copilot-chat Marketplace version + * - user-agent — Same as editor-plugin-version, GitHubCopilotChat/ + * - copilot-integration-id — Fixed "vscode-chat" + * - openai-intent — Fixed "conversation-panel" + * - x-github-api-version — Fixed "2025-04-01" (verify periodically against VS Code source) + * - x-request-id — Per-request UUID via crypto.randomUUID() + * - x-vscode-user-agent-library-version — Fixed "electron-fetch" + * - copilot-vision-request — Added when request includes image content + */ export const copilotHeaders = (state: State, vision: boolean = false) => { + const copilotVersion = + state.copilotChatVersion ?? COPILOT_CHAT_VERSION_FALLBACK const headers: Record = { Authorization: `Bearer ${state.copilotToken}`, "content-type": standardHeaders()["content-type"], "copilot-integration-id": "vscode-chat", - "editor-version": `vscode/${state.vsCodeVersion}`, - "editor-plugin-version": EDITOR_PLUGIN_VERSION, - "user-agent": USER_AGENT, + "editor-version": `vscode/${state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK}`, + "editor-plugin-version": `copilot-chat/${copilotVersion}`, + "user-agent": `GitHubCopilotChat/${copilotVersion}`, "openai-intent": "conversation-panel", "x-github-api-version": API_VERSION, "x-request-id": randomUUID(), @@ -37,15 +54,19 @@ export const copilotHeaders = (state: State, vision: boolean = false) => { } export const GITHUB_API_BASE_URL = "https://api.github.com" -export const githubHeaders = (state: State) => ({ - ...standardHeaders(), - authorization: `token ${state.githubToken}`, - "editor-version": `vscode/${state.vsCodeVersion}`, - "editor-plugin-version": EDITOR_PLUGIN_VERSION, - "user-agent": USER_AGENT, - "x-github-api-version": API_VERSION, - "x-vscode-user-agent-library-version": "electron-fetch", -}) +export const githubHeaders = (state: State) => { + const copilotVersion = + state.copilotChatVersion ?? COPILOT_CHAT_VERSION_FALLBACK + return { + ...standardHeaders(), + authorization: `token ${state.githubToken}`, + "editor-version": `vscode/${state.vsCodeVersion ?? VSCODE_VERSION_FALLBACK}`, + "editor-plugin-version": `copilot-chat/${copilotVersion}`, + "user-agent": `GitHubCopilotChat/${copilotVersion}`, + "x-github-api-version": API_VERSION, + "x-vscode-user-agent-library-version": "electron-fetch", + } +} export const GITHUB_BASE_URL = "https://github.com" export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98" diff --git a/src/lib/model-routing.ts b/src/lib/model-routing.ts new file mode 100644 index 000000000..89f6ddd30 --- /dev/null +++ b/src/lib/model-routing.ts @@ -0,0 +1,55 @@ +/** + * Model-to-endpoint routing. + * + * Copilot upstream serves some models exclusively via the Responses API + * (/responses) and others via Chat Completions (/chat/completions). + * Sending a Responses-only model to /chat/completions produces an error. + * + * Detection order: + * 1. If state.models is loaded, check model capabilities.type === "responses" + * (if the upstream ever adds this field). Currently Copilot doesn't set it, + * so we fall through to step 2. + * 2. Static prefix/suffix list (known Responses-only models as of 2025-05). + * + * "Responses-only" models: all gpt-5*-codex variants, o1-pro, o3-pro. + * Everything else (gpt-4o, gpt-5, o1, o3, o4-mini, claude-*, gemini-*) uses + * Chat Completions (or native Anthropic pass-through for Claude). + */ + +import { state } from "~/lib/state" + +/** Endpoint mode for routing. */ +export type ModelMode = "chat" | "responses" + +/** + * Returns the upstream endpoint mode for the given model ID. + * "responses" = must use /responses; "chat" = use /chat/completions (or native Anthropic). + */ +export function getModelMode(modelId: string): ModelMode { + // Guard: treat missing/empty model as "chat" — upstream will reject with a proper error + if (!modelId) return "chat" + + // 1. Check state.models capabilities if available (future-proof) + if (state.models?.data) { + const entry = state.models.data.find((m) => m.id === modelId) + if (entry?.capabilities.type === "responses") return "responses" + if (entry?.capabilities.type === "chat") return "chat" // trust upstream when explicit + } + + // 2. Static heuristic: Responses-only models have "codex" in the name + // or are o-series "pro" variants. + return isResponsesOnlyModel(modelId) ? "responses" : "chat" +} + +/** + * Returns true if the model is known to be Responses-only on Copilot upstream. + */ +export function isResponsesOnlyModel(modelId: string): boolean { + // codex family: gpt-5-codex, gpt-5.1-codex, gpt-5.1-codex-max, gpt-5.3-codex, etc. + // Anchored to word boundaries to avoid matching hypothetical future "codex-mini" chat models. + if (/(?:^|-)codex(?:-|$)/.test(modelId)) return true + // o-pro family: o1-pro, o3-pro, o1-pro-2025-04-09, o3-pro-2025-01-10, etc. + // Covers: o\d+-pro(?:-\d{4}-\d{2}-\d{2})? — requires string to end after "pro" or date + if (/^o\d+-pro(?:-\d{4}-\d{2}-\d{2})?$/.test(modelId)) return true + return false +} diff --git a/src/lib/state.ts b/src/lib/state.ts index 5ba4dc1d1..01f491c35 100644 --- a/src/lib/state.ts +++ b/src/lib/state.ts @@ -7,6 +7,7 @@ export interface State { accountType: string models?: ModelsResponse vsCodeVersion?: string + copilotChatVersion?: string manualApprove: boolean rateLimitWait: boolean diff --git a/src/lib/utils.ts b/src/lib/utils.ts index cc80be667..892c61d74 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -1,7 +1,4 @@ -import consola from "consola" - import { getModels } from "~/services/copilot/get-models" -import { getVSCodeVersion } from "~/services/get-vscode-version" import { state } from "./state" @@ -17,10 +14,3 @@ export async function cacheModels(): Promise { const models = await getModels() state.models = models } - -export const cacheVSCodeVersion = async () => { - const response = await getVSCodeVersion() - state.vsCodeVersion = response - - consola.info(`Using VSCode version: ${response}`) -} diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9ed..456d1282b 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -4,6 +4,7 @@ import consola from "consola" import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" +import { getModelMode } from "~/lib/model-routing" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" @@ -15,11 +16,24 @@ import { } from "~/services/copilot/create-chat-completions" export async function handleCompletion(c: Context) { - await checkRateLimit(state) - let payload = await c.req.json() consola.debug("Request payload:", JSON.stringify(payload).slice(-400)) + if (getModelMode(payload.model) === "responses") { + return c.json( + { + error: { + message: `Model "${payload.model}" is only available via the Responses API. Use POST /v1/responses instead.`, + type: "invalid_request_error", + code: "responses_only_model", + }, + }, + 400, + ) + } + + await checkRateLimit(state) + // Find the selected model const selectedModel = state.models?.data.find( (model) => model.id === payload.model, diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..dda7657b3 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -18,9 +18,16 @@ export interface AnthropicMessagesPayload { type: "auto" | "any" | "tool" | "none" name?: string } - thinking?: { - type: "enabled" - budget_tokens?: number + /** + * Thinking config. + * - Legacy (claude-3.7 / claude-4.5): `{ type: "enabled", budget_tokens: N }` + * - New adaptive (claude-opus-4.7+): `{ type: "adaptive" }` paired with + * `output_config.effort` in the request body. + */ + thinking?: { type: "enabled"; budget_tokens?: number } | { type: "adaptive" } + /** Used together with `thinking: { type: "adaptive" }` on opus-4.7+. */ + output_config?: { + effort?: "low" | "medium" | "high" } service_tier?: "auto" | "standard_only" } @@ -32,17 +39,24 @@ export interface AnthropicTextBlock { export interface AnthropicImageBlock { type: "image" - source: { - type: "base64" - media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" - data: string - } + source: + | { + type: "base64" + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" + data: string + } + | { + /** URL images are rejected by Copilot upstream — kept for type fidelity only. */ + type: "url" + url: string + } } export interface AnthropicToolResultBlock { type: "tool_result" tool_use_id: string - content: string + /** May be a plain string or an array of content blocks. */ + content: string | Array is_error?: boolean } @@ -56,6 +70,12 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + /** + * Opaque signature returned by the upstream for extended thinking blocks. + * Must be echoed back in subsequent turns to enable multi-turn reasoning. + * Present on native pass-through responses; absent on translated responses. + */ + signature?: string } export type AnthropicUserContentBlock = @@ -106,6 +126,7 @@ export interface AnthropicResponse { output_tokens: number cache_creation_input_tokens?: number cache_read_input_tokens?: number + /** Present on native pass-through responses. */ service_tier?: "standard" | "priority" | "batch" } } diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..e383b2dc0 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -11,9 +11,12 @@ import { type ChatCompletionChunk, type ChatCompletionResponse, } from "~/services/copilot/create-chat-completions" +import { createMessagesNative } from "~/services/copilot/create-messages-native" +import { isNativeAnthropicModel } from "~/services/copilot/native-models" import { type AnthropicMessagesPayload, + type AnthropicStreamEventData, type AnthropicStreamState, } from "./anthropic-types" import { @@ -28,16 +31,83 @@ export async function handleCompletion(c: Context) { const anthropicPayload = await c.req.json() consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) + if (state.manualApprove) { + await awaitApproval() + } + + // Route to native Anthropic pass-through for Claude models to preserve + // thinking blocks (with signature), top_k, cache_control, and richer usage. + if (isNativeAnthropicModel(anthropicPayload.model)) { + return handleNative(c, anthropicPayload) + } + + return handleTranslated(c, anthropicPayload) +} + +// --------------------------------------------------------------------------- +// Native Anthropic pass-through (Claude 4.5+ models) +// --------------------------------------------------------------------------- + +async function handleNative( + c: Context, + payload: AnthropicMessagesPayload, +): Promise { + consola.debug("Using native Anthropic pass-through for", payload.model) + + const response = await createMessagesNative(payload) + + if (!payload.stream) { + // Non-streaming: upstream already returned a complete Anthropic response + consola.debug( + "Native non-streaming response:", + JSON.stringify(response).slice(0, 400), + ) + return c.json(response) + } + + // Streaming: proxy the SSE events directly to the client + consola.debug("Native streaming response — proxying SSE events") + return streamSSE(c, async (stream) => { + for await (const rawEvent of response as AsyncIterable<{ + data?: string + event?: string + }>) { + if (!rawEvent.data) continue + + // Forward verbatim — never block on parse failure + await stream.writeSSE({ + event: rawEvent.event, + data: rawEvent.data, + }) + + // Parse only for debug logging + try { + const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData + consola.debug("Native SSE event:", parsed.type) + } catch { + consola.warn( + "Could not parse native SSE chunk for logging:", + rawEvent.data.slice(0, 200), + ) + } + } + }) +} + +// --------------------------------------------------------------------------- +// Translation path (non-Claude models via /chat/completions) +// --------------------------------------------------------------------------- + +async function handleTranslated( + c: Context, + anthropicPayload: AnthropicMessagesPayload, +): Promise { const openAIPayload = translateToOpenAI(anthropicPayload) consola.debug( "Translated OpenAI request payload:", JSON.stringify(openAIPayload), ) - if (state.manualApprove) { - await awaitApproval() - } - const response = await createChatCompletions(openAIPayload) if (isNonStreaming(response)) { diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..e154c3714 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,5 @@ +import consola from "consola" + import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -213,12 +215,20 @@ function mapContent( break } case "image": { - contentParts.push({ - type: "image_url", - image_url: { - url: `data:${block.source.media_type};base64,${block.source.data}`, - }, - }) + if (block.source.type === "base64") { + contentParts.push({ + type: "image_url", + image_url: { + url: `data:${block.source.media_type};base64,${block.source.data}`, + }, + }) + } else { + // URL images are rejected by Copilot upstream — skip silently + // (type kept for fidelity when round-tripping through native path) + consola.warn( + "URL image source not supported in translation path — skipping", + ) + } break } @@ -302,7 +312,9 @@ export function translateToAnthropic( } } - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses + // Note: the translation path routes Claude models via /chat/completions which + // does not return thinking blocks. For thinking block support use the native + // Anthropic pass-through path (create-messages-native.ts). return { id: response.id, diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts index 5254e2af7..2de4b797c 100644 --- a/src/routes/models/route.ts +++ b/src/routes/models/route.ts @@ -1,6 +1,7 @@ import { Hono } from "hono" import { forwardError } from "~/lib/error" +import { getModelMode } from "~/lib/model-routing" import { state } from "~/lib/state" import { cacheModels } from "~/lib/utils" @@ -21,6 +22,7 @@ modelRoutes.get("/", async (c) => { created_at: new Date(0).toISOString(), // No date available from source owned_by: model.vendor, display_name: model.name, + mode: getModelMode(model.id), })) return c.json({ diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts new file mode 100644 index 000000000..53ead897e --- /dev/null +++ b/src/routes/responses/handler.ts @@ -0,0 +1,109 @@ +import type { Context } from "hono" + +import consola from "consola" +import { streamSSE } from "hono/streaming" + +import { awaitApproval } from "~/lib/approval" +import { state } from "~/lib/state" +import { createResponses } from "~/services/copilot/create-responses" + +import type { ResponsesPayload, ResponsesResponse } from "./types" + +import { sanitiseOutputItem, sanitiseResponsesOutput } from "./translation" + +export async function handleResponses(c: Context): Promise { + let payload: ResponsesPayload + try { + payload = await c.req.json() + } catch { + return c.json( + { + error: { + message: "Invalid JSON body", + type: "invalid_request_error", + code: "invalid_json", + }, + }, + 400, + ) + } + + consola.debug("Responses API request payload:", JSON.stringify(payload)) + + if (state.manualApprove) { + await awaitApproval() + } + + const response = await createResponses(payload) + + if (!payload.stream) { + const sanitised = sanitiseResponsesOutput(response as ResponsesResponse) + consola.debug( + "Responses non-streaming response:", + JSON.stringify(sanitised).slice(0, 400), + ) + return c.json(sanitised) + } + + // Streaming: proxy SSE events verbatim (same pattern as native Anthropic pass-through) + consola.debug("Responses streaming response — proxying SSE events") + return streamSSE( + c, + async (stream) => { + for await (const rawEvent of response as AsyncIterable<{ + data?: string + event?: string + }>) { + if (!rawEvent.data) continue + + // Sanitise status:null from embedded output items before forwarding. + // SSE events like response.output_item.done carry full item snapshots + // which can contain null status fields rejected by upstream on re-submission. + let forwardData = rawEvent.data + if (rawEvent.data !== "[DONE]") { + try { + const parsed = JSON.parse(rawEvent.data) as Record + consola.debug( + "Responses SSE event:", + (parsed as { type?: string }).type, + ) + // Sanitise embedded item or output array + if (parsed["item"]) { + parsed["item"] = sanitiseOutputItem( + parsed["item"] as Parameters[0], + ) + } + if (Array.isArray(parsed["output"])) { + parsed["output"] = ( + parsed["output"] as Array< + Parameters[0] + > + ).map((i) => sanitiseOutputItem(i)) + } + forwardData = JSON.stringify(parsed) + } catch { + // [DONE] sentinel or truly malformed chunk + if (rawEvent.data !== "[DONE]") { + consola.warn( + "Could not parse Responses SSE chunk for logging:", + rawEvent.data.slice(0, 200), + ) + } + } + } + + await stream.writeSSE({ + event: rawEvent.event, + data: forwardData, + }) + } + }, + async (err, stream) => { + consola.error("Responses SSE stream error:", err) + await stream.writeSSE({ + event: "error", + data: JSON.stringify({ message: String(err) }), + }) + }, + ) +} diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts new file mode 100644 index 000000000..4be774e59 --- /dev/null +++ b/src/routes/responses/route.ts @@ -0,0 +1,17 @@ +import { Hono } from "hono" + +import { forwardError } from "~/lib/error" + +import { handleResponses } from "./handler" + +const responses = new Hono() + +responses.post("/", async (c) => { + try { + return await handleResponses(c) + } catch (error) { + return await forwardError(c, error) + } +}) + +export default responses diff --git a/src/routes/responses/translation.ts b/src/routes/responses/translation.ts new file mode 100644 index 000000000..4fa2c456d --- /dev/null +++ b/src/routes/responses/translation.ts @@ -0,0 +1,50 @@ +/** + * Response translation for the Responses API path. + * + * Key invariants: + * - reasoning items MUST preserve `encrypted_content` verbatim (required for + * multi-turn continuity — see issue #6 and litellm PR #17130) + * - `status: null` fields are stripped (Copilot upstream rejects null status + * on subsequent turns — see litellm PR #22370) + */ + +import type { ResponsesResponse, ResponsesOutputItem } from "./types" + +// Upstream (e.g. litellm) may send `status: null` even though our TypeScript +// types forbid it. Use a separate loose type to represent that reality. +type LooseOutputItem = Omit & { + status?: string | null +} + +/** + * Sanitise a Responses API response object before forwarding to the client. + * + * Guarantees: + * 1. `encrypted_content` on reasoning items is preserved (never stripped). + * 2. `status: null` is removed from all output items. + * 3. All other fields are passed through untouched. + */ +export function sanitiseResponsesOutput( + response: ResponsesResponse, +): ResponsesResponse { + return { + ...response, + output: response.output.map((item) => sanitiseOutputItem(item)), + } +} + +/** + * Sanitise a single output item from an SSE event or non-streaming response. + * Exported so the streaming path can apply the same logic per-event. + */ +export function sanitiseOutputItem( + item: ResponsesOutputItem, +): ResponsesOutputItem { + // Cast to the loose type so the null-status check is valid at compile time. + const loose = item as unknown as LooseOutputItem + if (loose.status === null) { + const { status: _dropped, ...rest } = loose + return rest as unknown as ResponsesOutputItem + } + return item +} diff --git a/src/routes/responses/types.ts b/src/routes/responses/types.ts new file mode 100644 index 000000000..642df7719 --- /dev/null +++ b/src/routes/responses/types.ts @@ -0,0 +1,149 @@ +// Request types +export interface ResponsesPayload { + model: string + input: string | Array + instructions?: string + tools?: Array + tool_choice?: + | "auto" + | "none" + | "required" + | { type: "function"; name: string } + temperature?: number | null + top_p?: number | null + max_output_tokens?: number | null + reasoning?: { + effort?: "low" | "medium" | "high" + summary?: "auto" | "concise" | "detailed" + } | null + previous_response_id?: string | null + store?: boolean | null + include?: Array | null + stream?: boolean | null + metadata?: Record | null + parallel_tool_calls?: boolean | null + service_tier?: "auto" | "default" | null + truncation?: "auto" | "disabled" | null + user?: string | null +} + +// Input item types (union) +export type ResponsesInputItem = + | ResponsesInputMessage + | ResponsesInputFunctionCall + | ResponsesFunctionCallOutput + | ResponsesReasoningItem + +export interface ResponsesInputMessage { + type: "message" + role: "user" | "assistant" | "system" | "developer" + content: string | Array + id?: string + status?: "completed" | "incomplete" | null +} + +export interface ResponsesInputFunctionCall { + type: "function_call" + id?: string + call_id: string + name: string + arguments: string + status?: "completed" | "in_progress" | "incomplete" | null +} + +export interface ResponsesFunctionCallOutput { + type: "function_call_output" + id?: string + call_id: string + output: string +} + +export interface ResponsesReasoningItem { + type: "reasoning" + id: string + encrypted_content?: string + summary?: Array<{ type: "summary_text"; text: string }> + status?: "completed" | "in_progress" | "incomplete" | null +} + +export type ResponsesContentPart = + | { type: "input_text"; text: string } + | { + type: "input_image" + image_url?: string | null + file_id?: string | null + detail?: "low" | "high" | "auto" + } + | { + type: "input_file" + /** Pre-uploaded file via Files API */ + file_id?: string + /** Base64-encoded inline file content */ + file_data?: string + filename?: string + } + +// Tool types +export interface ResponsesTool { + type: "function" + name: string + description?: string + parameters?: Record + strict?: boolean +} + +// Output item types (response) +export type ResponsesOutputItem = + | ResponsesOutputMessage + | ResponsesOutputFunctionCall + | ResponsesOutputReasoning + +export interface ResponsesOutputMessage { + type: "message" + id: string + role: "assistant" + content: Array + status: "completed" | "incomplete" | "in_progress" +} + +export interface ResponsesOutputFunctionCall { + type: "function_call" + id: string + call_id: string + name: string + arguments: string + status: "completed" | "incomplete" | "in_progress" +} + +export interface ResponsesOutputReasoning { + type: "reasoning" + id: string + encrypted_content?: string + summary?: Array<{ type: "summary_text"; text: string }> + status: "completed" | "incomplete" | "in_progress" +} + +export type ResponsesOutputContentPart = + | { type: "output_text"; text: string; annotations?: Array } + | { type: "refusal"; refusal: string } + +// Response type +export interface ResponsesResponse { + id: string + object: "response" + created_at: number + model: string + status: "completed" | "incomplete" | "in_progress" | "failed" | "cancelled" + output: Array + usage?: { + input_tokens: number + output_tokens: number + total_tokens: number + input_tokens_details?: { cached_tokens?: number } + output_tokens_details?: { reasoning_tokens?: number } + } + error?: { code: string; message: string } | null + incomplete_details?: { reason: string } | null + metadata?: Record | null + service_tier?: "default" | "flex" | (string & {}) +} diff --git a/src/server.ts b/src/server.ts index 462a278f3..6e6b6a878 100644 --- a/src/server.ts +++ b/src/server.ts @@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route" import { embeddingRoutes } from "./routes/embeddings/route" import { messageRoutes } from "./routes/messages/route" import { modelRoutes } from "./routes/models/route" +import responses from "./routes/responses/route" import { tokenRoute } from "./routes/token/route" import { usageRoute } from "./routes/usage/route" @@ -29,3 +30,7 @@ server.route("/v1/embeddings", embeddingRoutes) // Anthropic compatible endpoints server.route("/v1/messages", messageRoutes) + +// OpenAI Responses API +server.route("/responses", responses) +server.route("/v1/responses", responses) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..ea4eb5d55 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -71,6 +71,7 @@ export interface ChatCompletionChunk { interface Delta { content?: string | null + reasoning_content?: string | null role?: "user" | "assistant" | "system" | "tool" tool_calls?: Array<{ index: number @@ -112,6 +113,7 @@ export interface ChatCompletionResponse { interface ResponseMessage { role: "assistant" content: string | null + reasoning_content?: string | null tool_calls?: Array } @@ -148,6 +150,7 @@ export interface ChatCompletionsPayload { | { type: "function"; function: { name: string } } | null user?: string | null + reasoning_effort?: "low" | "medium" | "high" | null } export interface Tool { diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts new file mode 100644 index 000000000..8b9fb54fd --- /dev/null +++ b/src/services/copilot/create-messages-native.ts @@ -0,0 +1,167 @@ +/** + * Native Anthropic pass-through service. + * + * The GitHub Copilot upstream (`api.enterprise.githubcopilot.com`) natively + * speaks the Anthropic Messages API for all Claude 4.5+ models. Routing + * requests directly to `/v1/messages` instead of translating them through + * `/chat/completions` gives us: + * + * - Real thinking blocks with `signature` field (multi-turn reasoning) + * - `cache_creation_input_tokens` in usage + * - `top_k` support + * - No lossy translation round-trip + * + * See research notes: ~/copilot-models-litellm/copilot_models.py + */ + +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +/** + * Forward an Anthropic-format request directly to Copilot's native `/v1/messages` + * endpoint, preserving all fields (thinking, signature, top_k, cache_control, …). + * + * Returns: + * - For non-streaming: the raw Anthropic JSON response object + * - For streaming: an async iterable of SSE events (fetch-event-stream) + */ +export const createMessagesNative = async ( + payload: AnthropicMessagesPayload, +) => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const hasVision = messageHasImages(payload) + const headers = buildNativeHeaders(hasVision, Boolean(payload.stream)) + + const upstream = `${copilotBaseUrl(state)}/v1/messages` + consola.debug("Native Anthropic upstream:", upstream) + + // Strip fields that are Copilot-API–specific or unsupported by upstream + const body = buildUpstreamPayload(payload) + + const response = await fetch(upstream, { + method: "POST", + headers, + body: JSON.stringify(body), + }) + + if (!response.ok) { + consola.error("Native Anthropic upstream error", response.status) + throw new HTTPError("Native Anthropic upstream error", response) + } + + if (payload.stream) { + return events(response) + } + + return response.json() +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Build headers for the Anthropic native endpoint. + * + * The upstream requires `anthropic-version` and does NOT want an `openai-intent` + * header. We reuse `copilotHeaders()` for auth/agent headers and then layer the + * Anthropic-specific ones on top. + */ +function buildNativeHeaders( + vision: boolean, + stream: boolean, +): Record { + const base = copilotHeaders(state, vision) + + // Remove headers that are OpenAI-specific and not expected by Anthropic endpoint + const { "openai-intent": _dropped, ...anthropicBase } = base + + return { + ...anthropicBase, + "anthropic-version": "2023-06-01", + // Enable beta features: extended thinking + prompt caching + "anthropic-beta": + "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", + // Only request SSE streaming format when the caller is streaming + ...(stream ? { accept: "text/event-stream" } : {}), + } +} + +/** + * Produce the payload forwarded to upstream. + * + * We pass through almost everything verbatim. The only transformation is that + * `claude-opus-4.7+` requires the new adaptive thinking format + * (`thinking: { type: "adaptive" }` + `output_config.effort`) rather than the + * legacy `{ type: "enabled", budget_tokens: N }`. If the caller already sent + * the correct format we leave it alone; if they sent the old format and the + * model requires adaptive, we upgrade automatically. + */ +export function buildUpstreamPayload( + payload: AnthropicMessagesPayload, +): AnthropicMessagesPayload { + const { thinking, output_config, ...rest } = payload + + if (!thinking) { + return rest // safe: output_config only valid alongside thinking + } + + if (isAdaptiveThinkingModel(payload.model)) { + // Upgrade legacy enabled → adaptive if needed + if (thinking.type === "enabled") { + consola.debug( + `Upgrading thinking format to adaptive for model ${payload.model}`, + ) + return { + ...rest, + thinking: { type: "adaptive" }, + output_config: + output_config?.effort ? output_config : { effort: "medium" }, + } + } + // Already adaptive — forward as-is + return { ...rest, thinking, output_config } + } + + // Non-adaptive model — forward legacy format, drop output_config + return { ...rest, thinking } +} + +/** + * Returns true for models that require the adaptive thinking API + * (`{ type: "adaptive" }` + `output_config.effort`) rather than the + * legacy `{ type: "enabled", budget_tokens: N }`. + * Currently: claude-opus-4.7 and later. + */ +function isAdaptiveThinkingModel(model: string): boolean { + // claude-opus-4.7 and above use adaptive thinking + const match = model.match(/^claude-opus-4[.-](\d+)/) + if (match) { + const minor = Number.parseInt(match[1], 10) + // claude-opus-4.7 and later use the new adaptive thinking API (not legacy budget_tokens) + return minor >= 7 + } + return false +} + +/** + * Check whether the request contains any image blocks (to set vision headers). + */ +function messageHasImages(payload: AnthropicMessagesPayload): boolean { + for (const msg of payload.messages) { + if (typeof msg.content === "string") continue + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "image") return true + } + } + } + return false +} diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts new file mode 100644 index 000000000..1bf7d4aba --- /dev/null +++ b/src/services/copilot/create-responses.ts @@ -0,0 +1,90 @@ +import type { ServerSentEventMessage } from "fetch-event-stream" + +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { + ResponsesContentPart, + ResponsesInputItem, + ResponsesPayload, + ResponsesResponse, +} from "~/routes/responses/types" + +import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Returns true if any input item contains an `input_image` content part. + * Handles both a top-level string input and an array of input items. + */ +export function inputHasImages(payload: ResponsesPayload): boolean { + if (typeof payload.input === "string") return false + + return payload.input.some((item) => { + if (item.type !== "message") return false + if (typeof item.content === "string") return false + return item.content.some( + (part: ResponsesContentPart) => part.type === "input_image", + ) + }) +} + +/** + * Returns true if this looks like an agent/multi-turn call: + * - any input item has role "assistant", OR + * - any item has type "function_call_output", "function_call", or "reasoning" + * (reasoning items only appear when echoing back prior agentic turn context) + */ +export function isAgentCall(payload: ResponsesPayload): boolean { + if (typeof payload.input === "string") return false + + return payload.input.some( + (item: ResponsesInputItem) => + ("role" in item && item.role === "assistant") + || item.type === "function_call_output" + || item.type === "function_call" + || item.type === "reasoning", + ) +} + +// --------------------------------------------------------------------------- +// Service client +// --------------------------------------------------------------------------- + +export const createResponses = async ( + payload: ResponsesPayload, +): Promise> => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const enableVision = inputHasImages(payload) + + const initiator = isAgentCall(payload) ? "agent" : "user" + + // TODO(#11): add Copilot-Vision-Request header when vision detected + const headers: Record = { + ...copilotHeaders(state, enableVision), + "X-Initiator": initiator, + } + + const response = await fetch(`${copilotBaseUrl(state)}/responses`, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + consola.error("Failed to create responses", response) + throw new HTTPError("Failed to create responses", response) + } + + if (payload.stream) { + return events(response) + } + + return (await response.json()) as ResponsesResponse +} diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 3cfa30af0..fb215bce8 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -36,7 +36,8 @@ interface ModelCapabilities { object: string supports: ModelSupports tokenizer: string - type: string + /** Known values: "chat" | "responses". Open string for forward-compat. */ + type: "chat" | "responses" | (string & {}) } export interface Model { diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts new file mode 100644 index 000000000..7d731d01b --- /dev/null +++ b/src/services/copilot/native-models.ts @@ -0,0 +1,36 @@ +/** + * Dynamic detection of which models support native Anthropic pass-through. + * + * The Copilot `/models` endpoint returns a `vendor` field for each model. + * Any model with `vendor === "Anthropic"` is served natively via the + * `/v1/messages` path at `api.enterprise.githubcopilot.com`. + * + * We cache the set of native model IDs after the first `/models` call and + * keep it in sync with `state.models` (which is refreshed periodically by + * the token-rotation logic). + */ + +import { state } from "~/lib/state" + +/** + * Returns true if the given model ID should be routed to the native + * Anthropic pass-through service instead of the OpenAI chat-completions + * translation layer. + * + * Resolution order: + * 1. If `state.models` is populated, check whether the model's vendor is + * "Anthropic" (live, always up-to-date). + * 2. Fall back to a static prefix list for resilience at startup before + * the models list is fetched. + */ +export function isNativeAnthropicModel(modelId: string): boolean { + if (state.models?.data) { + const entry = state.models.data.find((m) => m.id === modelId) + if (entry) { + return entry.vendor === "Anthropic" + } + // Model not found in list — fall through to prefix heuristic + } + + return modelId.startsWith("claude-") +} diff --git a/src/services/get-copilot-chat-version.ts b/src/services/get-copilot-chat-version.ts new file mode 100644 index 000000000..b4278b669 --- /dev/null +++ b/src/services/get-copilot-chat-version.ts @@ -0,0 +1,82 @@ +import consola from "consola" + +import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache" + +export const FALLBACK = "0.26.7" + +let cache: VersionCache | undefined + +async function fetchFromMarketplace(): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => { + controller.abort() + }, 5000) + + try { + const response = await fetch( + "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery", + { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json;api-version=3.0-preview.1", + }, + body: JSON.stringify({ + filters: [ + { + criteria: [{ filterType: 7, value: "GitHub.copilot-chat" }], + }, + ], + flags: 529, + }), + signal: controller.signal, + }, + ) + + /* eslint-disable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access */ + const data = (await response.json()) as any + const parsed: unknown = + data?.results?.[0]?.extensions?.[0]?.versions?.[0]?.version + /* eslint-enable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access */ + + if (typeof parsed !== "string" || !parsed) { + throw new Error("Unexpected response shape") + } + + return parsed + } finally { + clearTimeout(timeout) + } +} + +export async function getCopilotChatVersion(): Promise { + if (cache && Date.now() - cache.fetchedAt < VERSION_CACHE_TTL_MS) { + return cache.version + } + + let fetched: string | null = null + + try { + fetched = await fetchFromMarketplace() + } catch { + consola.warn( + "Failed to fetch Copilot Chat version from Marketplace, using fallback", + ) + } + + const version = + fetched !== null && /^\d+\.\d+\.\d+$/.test(fetched) ? fetched : FALLBACK + + if (fetched !== null && version !== FALLBACK) { + // eslint-disable-next-line require-atomic-updates + cache = { version, fetchedAt: Date.now() } + } else if (fetched !== null) { + // Format validation rejected the fetched value + const safeVersion = fetched.slice(0, 40).replaceAll(/[^\x20-\x7E]/g, "?") + consola.warn( + `Invalid version format received: ${safeVersion}, using fallback`, + ) + } + + return version +} diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts index 6078f09b5..bfb92de5e 100644 --- a/src/services/get-vscode-version.ts +++ b/src/services/get-vscode-version.ts @@ -1,6 +1,36 @@ -const FALLBACK = "1.104.3" +import consola from "consola" -export async function getVSCodeVersion() { +import { VERSION_CACHE_TTL_MS, type VersionCache } from "./version-cache" + +export const FALLBACK = "1.104.3" + +let cache: VersionCache | undefined + +async function fetchFromOfficialApi(): Promise { + const controller = new AbortController() + const timeout = setTimeout(() => { + controller.abort() + }, 5000) + + try { + const response = await fetch( + "https://update.code.visualstudio.com/api/releases/stable", + { signal: controller.signal }, + ) + + const versions = (await response.json()) as Array + + if (Array.isArray(versions) && versions.length > 0 && versions[0]) { + return versions[0] + } + + throw new Error("Unexpected response shape") + } finally { + clearTimeout(timeout) + } +} + +async function fetchFromAur(): Promise { const controller = new AbortController() const timeout = setTimeout(() => { controller.abort() @@ -9,25 +39,54 @@ export async function getVSCodeVersion() { try { const response = await fetch( "https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h=visual-studio-code-bin", - { - signal: controller.signal, - }, + { signal: controller.signal }, ) const pkgbuild = await response.text() - const pkgverRegex = /pkgver=([0-9.]+)/ - const match = pkgbuild.match(pkgverRegex) + const match = pkgbuild.match(/pkgver=(\d+\.\d+\.\d+)/) - if (match) { + if (match?.[1]) { return match[1] } - return FALLBACK - } catch { - return FALLBACK + throw new Error("Version not found in PKGBUILD") } finally { clearTimeout(timeout) } } -await getVSCodeVersion() +export async function getVSCodeVersion(): Promise { + if (cache && Date.now() - cache.fetchedAt < VERSION_CACHE_TTL_MS) { + return cache.version + } + + let fetched: string | null = null + + try { + fetched = await fetchFromOfficialApi() + } catch { + try { + fetched = await fetchFromAur() + } catch { + consola.warn( + "Failed to fetch VS Code version from all sources, using fallback", + ) + } + } + + const version = + fetched !== null && /^\d+\.\d+\.\d+$/.test(fetched) ? fetched : FALLBACK + + if (fetched !== null && version !== FALLBACK) { + // eslint-disable-next-line require-atomic-updates + cache = { version, fetchedAt: Date.now() } + } else if (fetched !== null) { + // Format validation rejected the fetched value + const safeVersion = fetched.slice(0, 40).replaceAll(/[^\x20-\x7E]/g, "?") + consola.warn( + `Invalid version format received: ${safeVersion}, using fallback`, + ) + } + + return version +} diff --git a/src/services/version-cache.ts b/src/services/version-cache.ts new file mode 100644 index 000000000..839d29438 --- /dev/null +++ b/src/services/version-cache.ts @@ -0,0 +1,6 @@ +export const VERSION_CACHE_TTL_MS = 24 * 60 * 60 * 1000 + +export interface VersionCache { + version: string + fetchedAt: number +} diff --git a/src/start.ts b/src/start.ts index 14abbbdff..9fca3b37f 100644 --- a/src/start.ts +++ b/src/start.ts @@ -11,8 +11,10 @@ import { initProxyFromEnv } from "./lib/proxy" import { generateEnvScript } from "./lib/shell" import { state } from "./lib/state" import { setupCopilotToken, setupGitHubToken } from "./lib/token" -import { cacheModels, cacheVSCodeVersion } from "./lib/utils" +import { cacheModels } from "./lib/utils" import { server } from "./server" +import { getCopilotChatVersion } from "./services/get-copilot-chat-version" +import { getVSCodeVersion } from "./services/get-vscode-version" interface RunServerOptions { port: number @@ -48,7 +50,13 @@ export async function runServer(options: RunServerOptions): Promise { state.showToken = options.showToken await ensurePaths() - await cacheVSCodeVersion() + ;[state.vsCodeVersion, state.copilotChatVersion] = await Promise.all([ + getVSCodeVersion(), + getCopilotChatVersion(), + ]) + consola.info( + `VS Code: ${state.vsCodeVersion} Copilot Chat: ${state.copilotChatVersion}`, + ) if (options.githubToken) { state.githubToken = options.githubToken diff --git a/tests/model-routing.test.ts b/tests/model-routing.test.ts new file mode 100644 index 000000000..8d758643f --- /dev/null +++ b/tests/model-routing.test.ts @@ -0,0 +1,282 @@ +import { + describe, + test, + expect, + afterEach, + beforeEach, + beforeAll, +} from "bun:test" + +import { getModelMode, isResponsesOnlyModel } from "../src/lib/model-routing" +import { state } from "../src/lib/state" +import { server } from "../src/server" + +// --------------------------------------------------------------------------- +// isResponsesOnlyModel — pure unit tests (no state needed) +// --------------------------------------------------------------------------- + +describe("isResponsesOnlyModel", () => { + test("gpt-5-codex → responses-only", () => + expect(isResponsesOnlyModel("gpt-5-codex")).toBe(true)) + test("gpt-5.1-codex → responses-only", () => + expect(isResponsesOnlyModel("gpt-5.1-codex")).toBe(true)) + test("gpt-5.1-codex-max → responses-only", () => + expect(isResponsesOnlyModel("gpt-5.1-codex-max")).toBe(true)) + test("gpt-5.3-codex → responses-only", () => + expect(isResponsesOnlyModel("gpt-5.3-codex")).toBe(true)) + test("o1-pro → responses-only", () => + expect(isResponsesOnlyModel("o1-pro")).toBe(true)) + test("o3-pro → responses-only", () => + expect(isResponsesOnlyModel("o3-pro")).toBe(true)) + test("gpt-4o → chat", () => + expect(isResponsesOnlyModel("gpt-4o")).toBe(false)) + test("gpt-5 → chat", () => expect(isResponsesOnlyModel("gpt-5")).toBe(false)) + test("o1 → chat", () => expect(isResponsesOnlyModel("o1")).toBe(false)) + test("o3 → chat", () => expect(isResponsesOnlyModel("o3")).toBe(false)) + test("claude-sonnet-4-5 → chat", () => + expect(isResponsesOnlyModel("claude-sonnet-4-5")).toBe(false)) + test("o4-mini → chat", () => + expect(isResponsesOnlyModel("o4-mini")).toBe(false)) + test("o4-pro → responses-only", () => + expect(isResponsesOnlyModel("o4-pro")).toBe(true)) + test("o1-pro-2025-04-09 (dated alias) → responses-only", () => + expect(isResponsesOnlyModel("o1-pro-2025-04-09")).toBe(true)) + test("o3-pro-mini → NOT responses-only (not a pro variant)", () => + expect(isResponsesOnlyModel("o3-pro-mini")).toBe(false)) +}) + +// --------------------------------------------------------------------------- +// getModelMode — with loaded models list (state mutation) +// --------------------------------------------------------------------------- + +describe("getModelMode — with loaded models list", () => { + let savedModels: typeof state.models + + beforeEach(() => { + savedModels = state.models + }) + + afterEach(() => { + state.models = savedModels + }) + + test("model with capabilities.type=responses in list → responses", () => { + state.models = { + object: "list", + data: [ + { + id: "future-responses-model", + vendor: "OpenAI", + name: "Future Model", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "responses", // upstream sets this + }, + }, + ], + } + expect(getModelMode("future-responses-model")).toBe("responses") + }) + + test("model with explicit capabilities.type=chat in list → chat (upstream authoritative)", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-5-codex", + vendor: "OpenAI", + name: "Codex", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + // capabilities.type = "chat" is authoritative → returns "chat" even though name contains "codex" + expect(getModelMode("gpt-5-codex")).toBe("chat") + }) + + test("regular chat model → chat", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-4o", + vendor: "OpenAI", + name: "GPT-4o", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(getModelMode("gpt-4o")).toBe("chat") + }) + + test("state.models undefined → heuristic (codex → responses)", () => { + state.models = undefined + expect(getModelMode("gpt-5-codex")).toBe("responses") + }) + + test("state.models undefined → heuristic (gpt-4o → chat)", () => { + state.models = undefined + expect(getModelMode("gpt-4o")).toBe("chat") + }) +}) + +// --------------------------------------------------------------------------- +// Route-level: POST /v1/chat/completions blocks Responses-only models +// --------------------------------------------------------------------------- + +describe("chat-completions route blocks responses-only models", () => { + let savedModels: typeof state.models + + beforeAll(() => { + state.copilotToken = "test-token" + state.vsCodeVersion = "1.99.0" + state.accountType = "individual" + state.manualApprove = false + }) + + beforeEach(() => { + savedModels = state.models + }) + + afterEach(() => { + state.models = savedModels + }) + + test("gpt-5-codex → 400 with responses_only_model code", async () => { + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5-codex", + messages: [{ role: "user", content: "hello" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { type: string; code: string; message: string } + } + expect(body.error.code).toBe("responses_only_model") + expect(body.error.type).toBe("invalid_request_error") + expect(body.error.message).toContain("gpt-5-codex") + expect(body.error.message).toContain("/v1/responses") + }) + + test("o1-pro → 400 with responses_only_model code", async () => { + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "o1-pro", + messages: [{ role: "user", content: "hello" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { code: string } + } + expect(body.error.code).toBe("responses_only_model") + }) + + test("gpt-5.1-codex-max → 400 with responses_only_model code", async () => { + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-5.1-codex-max", + messages: [{ role: "user", content: "hello" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { code: string } + } + expect(body.error.code).toBe("responses_only_model") + }) + + test("model with capabilities.type=responses in state is blocked at /v1/chat/completions", async () => { + // Set up a model that only the capabilities path would catch (not the heuristic) + state.models = { + object: "list", + data: [ + { + id: "o5-turbo", // no "codex", not "o\d+-pro" + vendor: "OpenAI", + name: "O5 Turbo", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "responses", + }, + }, + ], + } + + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "o5-turbo", + messages: [{ role: "user", content: "hi" }], + }), + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { error: { code: string } } + expect(body.error.code).toBe("responses_only_model") + }) + + test("gpt-4o is NOT blocked at /v1/chat/completions (chat model)", async () => { + // gpt-4o is a chat model — should pass the guard (will fail at upstream but not with 400) + // We just need status !== 400 with code responses_only_model + const res = await server.request("/v1/chat/completions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "hi" }], + }), + }) + // Should NOT return the routing 400 + if (res.status === 400) { + const body = (await res.json()) as { error?: { code?: string } } + expect(body.error?.code).not.toBe("responses_only_model") + } + // Any other status is fine (500 from missing upstream, etc.) + }) +}) diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts new file mode 100644 index 000000000..6e4f09c72 --- /dev/null +++ b/tests/native-passthrough.test.ts @@ -0,0 +1,257 @@ +import { describe, test, expect, beforeEach, afterEach } from "bun:test" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +import { state } from "~/lib/state" +import { buildUpstreamPayload } from "~/services/copilot/create-messages-native" +import { isNativeAnthropicModel } from "~/services/copilot/native-models" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Minimal valid payload base — only the fields required by the type. */ +function basePayload( + overrides: Partial, +): AnthropicMessagesPayload { + return { + model: "claude-sonnet-4-5", + messages: [{ role: "user", content: "hi" }], + max_tokens: 1024, + ...overrides, + } +} + +// --------------------------------------------------------------------------- +// buildUpstreamPayload tests +// --------------------------------------------------------------------------- + +describe("buildUpstreamPayload", () => { + // T1 — output_config present but thinking absent → output_config stripped + test("T1: strips output_config when thinking is absent", () => { + const payload = basePayload({ + output_config: { effort: "high" }, + }) + const result = buildUpstreamPayload(payload) + expect(result).not.toHaveProperty("output_config") + expect(result).not.toHaveProperty("thinking") + }) + + // T2 — adaptive upgrade preserves explicit output_config: { effort: "high" } + test("T2: adaptive upgrade preserves explicit output_config effort", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + output_config: { effort: "high" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + // Should keep caller's "high", not override to "medium" + expect(result.output_config).toEqual({ effort: "high" }) + }) + + // T3 — already adaptive → forwarded as-is + test("T3: already-adaptive thinking forwarded as-is", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "adaptive" }, + output_config: { effort: "low" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "low" }) + }) + + // T4 — legacy model with enabled thinking → kept as-is, no adaptive upgrade + test("T4: legacy model with enabled thinking kept as-is", () => { + const payload = basePayload({ + model: "claude-sonnet-4-5", + thinking: { type: "enabled", budget_tokens: 1024 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) + expect(result).not.toHaveProperty("output_config") + }) + + // T5 — adaptive upgrade with no output_config → defaults to effort:medium + test("T5: adaptive upgrade with no output_config defaults to effort:medium", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled", budget_tokens: 1024 }, + // output_config intentionally absent + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "medium" }) + }) + + // T6 — output_config: {} also triggers default (not bypassed) + test("T6: empty output_config triggers medium effort default", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + output_config: {}, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "medium" }) + }) +}) + +// --------------------------------------------------------------------------- +// isNativeAnthropicModel tests +// --------------------------------------------------------------------------- + +// Per-test state isolation +let savedModels: typeof state.models + +beforeEach(() => { + savedModels = state.models +}) + +afterEach(() => { + state.models = savedModels +}) + +describe("isNativeAnthropicModel", () => { + // T5 — model in loaded list with vendor "Anthropic" → true + test("T5: model with vendor Anthropic in loaded list → true", () => { + state.models = { + object: "list", + data: [ + { + id: "claude-sonnet-4-5", + vendor: "Anthropic", + name: "Claude Sonnet 4.5", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "claude", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(isNativeAnthropicModel("claude-sonnet-4-5")).toBe(true) + }) + + // T6 — model in loaded list with vendor "OpenAI" → false + test("T6: model with vendor OpenAI in loaded list → false", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-4o", + vendor: "OpenAI", + name: "GPT-4o", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(isNativeAnthropicModel("gpt-4o")).toBe(false) + }) + + // T7 — model NOT in loaded list, starts with "claude-" → true (heuristic) + test("T7: model not in loaded list but starts with claude- → true", () => { + state.models = { object: "list", data: [] } + expect(isNativeAnthropicModel("claude-future-1")).toBe(true) + }) + + // T8 — model NOT in loaded list, starts with "gpt-" → false + test("T8: model not in loaded list and starts with gpt- → false", () => { + state.models = { object: "list", data: [] } + expect(isNativeAnthropicModel("gpt-5")).toBe(false) + }) + + // T9 — state.models undefined → heuristic + test("T9: state.models undefined → heuristic (claude- prefix → true)", () => { + state.models = undefined + expect(isNativeAnthropicModel("claude-something")).toBe(true) + }) +}) + +// --------------------------------------------------------------------------- +// isAdaptiveThinkingModel boundary tests (via buildUpstreamPayload) +// --------------------------------------------------------------------------- + +describe("isAdaptiveThinkingModel boundaries (via buildUpstreamPayload)", () => { + // B1 — claude-opus-4.6 is NOT upgraded (one below threshold) + test("B1: claude-opus-4.6 does NOT get adaptive upgrade", () => { + const payload = basePayload({ + model: "claude-opus-4.6", + thinking: { type: "enabled", budget_tokens: 2048 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 2048 }) + expect(result).not.toHaveProperty("output_config") + }) + + // B2 — claude-opus-4.7 IS upgraded (exact threshold) + test("B2: claude-opus-4.7 (dot separator) IS upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B3 — claude-opus-4-7 (dash separator) IS upgraded + test("B3: claude-opus-4-7 (dash separator) IS upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-opus-4-7", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B4 — claude-opus-4-6 (dash separator) is NOT upgraded + test("B4: claude-opus-4-6 (dash separator) NOT upgraded", () => { + const payload = basePayload({ + model: "claude-opus-4-6", + thinking: { type: "enabled", budget_tokens: 512 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 512 }) + expect(result).not.toHaveProperty("output_config") + }) + + // B5 — claude-opus-4.8 (one above threshold) IS upgraded + test("B5: claude-opus-4.8 (one above threshold) IS upgraded", () => { + const payload = basePayload({ + model: "claude-opus-4.8", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B6 — claude-sonnet-4.7 (non-opus) is NOT upgraded + test("B6: claude-sonnet-4.7 (non-opus) NOT upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-sonnet-4.7", + thinking: { type: "enabled", budget_tokens: 1024 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) + expect(result).not.toHaveProperty("output_config") + }) +}) diff --git a/tests/responses-route.test.ts b/tests/responses-route.test.ts new file mode 100644 index 000000000..eccc085ca --- /dev/null +++ b/tests/responses-route.test.ts @@ -0,0 +1,299 @@ +import { + describe, + test, + expect, + mock, + beforeAll, + beforeEach, + afterEach, +} from "bun:test" + +import { state } from "../src/lib/state" +import { server } from "../src/server" + +// --------------------------------------------------------------------------- +// Global fetch mock — returns a minimal non-streaming Responses API response +// --------------------------------------------------------------------------- + +const mockResponseBody = { + id: "resp_test", + object: "response", + created_at: 1_700_000_000, + model: "gpt-4o", + status: "completed", + output: [], +} + +const fetchMock = mock(() => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + }), +) + +// @ts-expect-error – mock doesn't implement full fetch signature +;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock + +// Set up copilot token so createResponses doesn't throw +beforeAll(() => { + state.copilotToken = "test-token" + state.vsCodeVersion = "1.99.0" + state.accountType = "individual" + state.manualApprove = false +}) + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("POST /v1/responses — wired handler", () => { + test("non-streaming request returns upstream JSON", async () => { + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [], stream: false }), + }) + expect(res.status).toBe(200) + const body = (await res.json()) as typeof mockResponseBody + expect(body.object).toBe("response") + expect(body.id).toBe("resp_test") + }) + + test("same endpoint reachable at bare /responses path", async () => { + const res = await server.request("/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [], stream: false }), + }) + expect(res.status).toBe(200) + }) + + test("invalid JSON body returns 400", async () => { + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: "not-json{{{", + }) + expect(res.status).toBe(400) + const body = (await res.json()) as { + error: { type: string; code: string } + } + expect(body.error.type).toBe("invalid_request_error") + expect(body.error.code).toBe("invalid_json") + }) + + test("missing copilot token returns 500", async () => { + state.copilotToken = undefined + try { + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [] }), + }) + expect(res.status).toBe(500) + } finally { + state.copilotToken = "test-token" + } + }) +}) + +// --------------------------------------------------------------------------- +// createResponses behavior: X-Initiator header and error propagation +// --------------------------------------------------------------------------- + +describe("createResponses behavior", () => { + // Restore state and fetch mock before/after each test in this block + beforeEach(() => { + state.copilotToken = "test-token" + state.vsCodeVersion = "1.99.0" + state.accountType = "individual" + state.manualApprove = false + }) + + afterEach(() => { + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = fetchMock + }) + + test("X-Initiator = agent when assistant message present", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [ + { type: "message", role: "user", content: "hello" }, + { type: "message", role: "assistant", content: "hi there" }, + ], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("agent") + }) + + test("X-Initiator = user for pure user messages", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [{ type: "message", role: "user", content: "just a user" }], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("user") + }) + + test("X-Initiator = agent for function_call_output item", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [ + { type: "function_call_output", call_id: "call_1", output: "{}" }, + ], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("agent") + }) + + test("X-Initiator = agent for reasoning item (multi-turn context echo)", async () => { + const captureMock = mock( + (_url: string, opts: { headers: Record }) => + Promise.resolve({ + ok: true, + json: () => Promise.resolve(mockResponseBody), + headers: opts.headers, + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = captureMock + + await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "gpt-4o", + stream: false, + input: [ + { + type: "reasoning", + id: "rs_abc", + encrypted_content: "opaque-blob", + status: "completed", + }, + ], + }), + }) + + expect(captureMock).toHaveBeenCalled() + const sentHeaders = ( + captureMock.mock.calls[0][1] as { headers: Record } + ).headers + expect(sentHeaders["X-Initiator"]).toBe("agent") + }) + + test("upstream 4xx returns error response", async () => { + const errorMock = mock(() => + Promise.resolve({ + ok: false, + status: 429, + text: () => Promise.resolve("rate limited"), + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = errorMock + + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", stream: false, input: [] }), + }) + + expect(res.status).toBe(429) + }) + + test("streaming request proxies SSE events and returns text/event-stream", async () => { + const sseBody = + 'event: response.created\ndata: {"type":"response.created"}\n\n' + + 'event: response.completed\ndata: {"type":"response.completed"}\n\n' + + "data: [DONE]\n\n" + + const streamMock = mock(() => + Promise.resolve({ + ok: true, + headers: new Headers({ "content-type": "text/event-stream" }), + body: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(sseBody)) + controller.close() + }, + }), + }), + ) + // @ts-expect-error – mock doesn't implement full fetch signature + globalThis.fetch = streamMock + + const res = await server.request("/v1/responses", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gpt-4o", input: [], stream: true }), + }) + + expect(res.status).toBe(200) + expect(res.headers.get("content-type")).toMatch(/text\/event-stream/) + const text = await res.text() + expect(text).toContain("response.created") + expect(text).toContain("response.completed") + }) +}) diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts new file mode 100644 index 000000000..b1f4e26c2 --- /dev/null +++ b/tests/responses-translation.test.ts @@ -0,0 +1,158 @@ +import { describe, test, expect } from "bun:test" + +import type { ResponsesResponse } from "../src/routes/responses/types" + +import { sanitiseResponsesOutput } from "../src/routes/responses/translation" + +// Minimal valid response fixture +function makeResponse(output: ResponsesResponse["output"]): ResponsesResponse { + return { + id: "resp_test", + object: "response", + created_at: 1_700_000_000, + model: "gpt-5", + status: "completed", + output, + } +} + +describe("sanitiseResponsesOutput", () => { + test("preserves encrypted_content on reasoning items", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_abc", + encrypted_content: "opaque-blob-xyz", + summary: [{ type: "summary_text", text: "thought about it" }], + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + const reasoning = result.output[0] as { encrypted_content?: string } + expect(reasoning.encrypted_content).toBe("opaque-blob-xyz") + }) + + test("strips status: null from reasoning items (and preserves encrypted_content)", () => { + // Explicitly inject status: null — upstream sends this despite TS types forbidding it + const response = makeResponse([ + { + type: "reasoning", + id: "rs_null_status", + encrypted_content: "blob", + status: null, + } as unknown as ResponsesResponse["output"][0], + ]) + + const result = sanitiseResponsesOutput(response) + const item = result.output[0] as Record + // status must be stripped + expect("status" in item).toBe(false) + // encrypted_content must survive + expect(item["encrypted_content"]).toBe("blob") + }) + + test("preserves status: in_progress unchanged", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_inprogress", + status: "in_progress", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect((result.output[0] as { status: string }).status).toBe("in_progress") + }) + + test("preserves status: incomplete unchanged", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_incomplete", + status: "incomplete", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect((result.output[0] as { status: string }).status).toBe("incomplete") + }) + + test("preserves non-null status on reasoning items", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_completed", + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect((result.output[0] as { status: string }).status).toBe("completed") + }) + + test("passes message items through unchanged", () => { + const response = makeResponse([ + { + type: "message", + id: "msg_1", + role: "assistant", + content: [{ type: "output_text", text: "hello" }], + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect(result.output[0]).toEqual(response.output[0]) + }) + + test("passes function_call items through unchanged", () => { + const response = makeResponse([ + { + type: "function_call", + id: "fc_1", + call_id: "call_abc", + name: "get_weather", + arguments: '{"city":"London"}', + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + expect(result.output[0]).toEqual(response.output[0]) + }) + + test("handles empty output array", () => { + const response = makeResponse([]) + const result = sanitiseResponsesOutput(response) + expect(result.output).toEqual([]) + }) + + test("top-level response fields are preserved", () => { + const response = makeResponse([]) + response.usage = { input_tokens: 100, output_tokens: 50, total_tokens: 150 } + const result = sanitiseResponsesOutput(response) + expect(result.usage).toEqual(response.usage) + expect(result.id).toBe("resp_test") + expect(result.model).toBe("gpt-5") + }) + + test("multiple mixed output items all sanitised", () => { + const response = makeResponse([ + { + type: "reasoning", + id: "rs_1", + encrypted_content: "secret", + status: null as unknown as "completed", + }, + { + type: "message", + id: "msg_1", + role: "assistant", + content: [{ type: "output_text", text: "answer" }], + status: "completed", + }, + ]) + const result = sanitiseResponsesOutput(response) + // First item: status stripped, encrypted_content preserved + const first = result.output[0] as Record + expect("status" in first).toBe(false) + expect(first["encrypted_content"]).toBe("secret") + // Second item: unchanged + expect(result.output[1]).toEqual(response.output[1]) + }) +}) diff --git a/tests/utils.test.ts b/tests/utils.test.ts new file mode 100644 index 000000000..923d15545 --- /dev/null +++ b/tests/utils.test.ts @@ -0,0 +1,45 @@ +import { describe, test, expect, mock, beforeEach } from "bun:test" + +import type { ModelsResponse } from "../src/services/copilot/get-models" + +// --------------------------------------------------------------------------- +// cacheModels — integration test against the real state singleton, +// with the service function mocked. +// --------------------------------------------------------------------------- + +const fakeModels: ModelsResponse = { + object: "list", + data: [], +} + +const mockGetModels = mock(() => Promise.resolve(fakeModels)) + +void mock.module("../src/services/copilot/get-models", () => ({ + getModels: mockGetModels, +})) + +// Import after mocking so the mocks are active +import { state } from "../src/lib/state" +import { cacheModels } from "../src/lib/utils" + +describe("cacheModels", () => { + beforeEach(() => { + state.models = undefined + mockGetModels.mockReset() + }) + + test("sets state.models with value from service", async () => { + mockGetModels.mockResolvedValue(fakeModels) + + expect(state.models).toBeUndefined() + await cacheModels() + expect(state.models).toEqual(fakeModels) + }) + + test("calls getModels exactly once", async () => { + mockGetModels.mockResolvedValue(fakeModels) + + await cacheModels() + expect(mockGetModels).toHaveBeenCalledTimes(1) + }) +}) diff --git a/tests/version-detection.test.ts b/tests/version-detection.test.ts new file mode 100644 index 000000000..a0a4b96e8 --- /dev/null +++ b/tests/version-detection.test.ts @@ -0,0 +1,349 @@ +import { describe, test, expect, mock, beforeEach } from "bun:test" +import { setSystemTime } from "bun:test" + +import type { state as StateType } from "../src/lib/state" +import type { getCopilotChatVersion as GetCopilotChatVersion } from "../src/services/get-copilot-chat-version" +import type { getVSCodeVersion as GetVSCodeVersion } from "../src/services/get-vscode-version" + +// --------------------------------------------------------------------------- +// We test the modules by mocking global `fetch` before importing them. +// Each describe block re-imports after resetting the module registry so we +// get fresh module-level caches for every suite. +// --------------------------------------------------------------------------- + +// Helpers ---------------------------------------------------------------- + +function makeFetchMock(handler: (url: string, init?: RequestInit) => Response) { + return mock((url: string | URL | Request, init?: RequestInit) => { + const urlStr = url instanceof Request ? url.url : url.toString() + return Promise.resolve(handler(urlStr, init)) + }) as unknown as typeof fetch +} + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { "Content-Type": "application/json" }, + }) +} + +function textResponse(body: string, status = 200): Response { + return new Response(body, { + status, + headers: { "Content-Type": "text/plain" }, + }) +} + +// Module type aliases for properly typed dynamic imports +type VSCodeVersionModule = { getVSCodeVersion: typeof GetVSCodeVersion } +type CopilotChatVersionModule = { + getCopilotChatVersion: typeof GetCopilotChatVersion +} +type StateModule = { state: typeof StateType } + +// --------------------------------------------------------------------------- +// getVSCodeVersion tests +// --------------------------------------------------------------------------- + +describe("getVSCodeVersion", () => { + let callCount = 0 + + beforeEach(() => { + callCount = 0 + }) + + test("returns version from official VS Code API (primary path)", async () => { + globalThis.fetch = makeFetchMock((_url) => { + callCount++ + return jsonResponse(["1.99.0", "1.98.0"]) + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now()}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.99.0") + expect(callCount).toBe(1) + }) + + test("falls back to AUR when official API fails", async () => { + let requestIndex = 0 + globalThis.fetch = makeFetchMock((_url) => { + const i = requestIndex++ + if (i === 0) throw new Error("network error") + // AUR PKGBUILD response + return textResponse("pkgver=1.88.0\narch=(x86_64)") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 1}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.88.0") + }) + + test("returns hardcoded fallback when both official API and AUR fail", async () => { + globalThis.fetch = makeFetchMock((_url) => { + throw new Error("offline") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 2}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.104.3") + }) + + test("cache prevents second fetch within TTL", async () => { + let fetchCallCount = 0 + globalThis.fetch = makeFetchMock((_url) => { + fetchCallCount++ + return jsonResponse(["1.99.5"]) + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 3}` + )) as VSCodeVersionModule + + const v1 = await mod.getVSCodeVersion() + const v2 = await mod.getVSCodeVersion() + + expect(v1).toBe("1.99.5") + expect(v2).toBe("1.99.5") + // fetch should only have been called once + expect(fetchCallCount).toBe(1) + }) + + // T1 — VS Code API returns malformed JSON (non-array body {}): falls back to AUR + test("T1: falls back to AUR when official API returns non-array body", async () => { + let requestIndex = 0 + globalThis.fetch = makeFetchMock((_url) => { + const i = requestIndex++ + if (i === 0) return jsonResponse({}) // non-array — triggers "Unexpected response shape" + // AUR PKGBUILD response + return textResponse("pkgver=1.90.0\narch=(x86_64)") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 10}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.90.0") + expect(requestIndex).toBe(2) + }) + + // T2 — AUR PKGBUILD missing pkgver= line: returns hardcoded fallback + test("T2: returns hardcoded fallback when AUR PKGBUILD has no pkgver line", async () => { + let requestIndex = 0 + globalThis.fetch = makeFetchMock((_url) => { + const i = requestIndex++ + if (i === 0) throw new Error("network error") + // AUR response missing pkgver= + return textResponse("pkgdesc='VSCode'\npkgrel=1\n") + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 11}` + )) as VSCodeVersionModule + const version = await mod.getVSCodeVersion() + expect(version).toBe("1.104.3") + }) + + // T5 — TTL expiry triggers refetch + test("T5: TTL expiry triggers a new fetch", async () => { + const CACHE_TTL = 24 * 60 * 60 * 1000 + let fetchCount = 0 + globalThis.fetch = makeFetchMock((_url) => { + fetchCount++ + return jsonResponse(["1.99.0"]) + }) + + const mod = (await import( + `../src/services/get-vscode-version.ts?t=${Date.now() + 12}` + )) as VSCodeVersionModule + + // First call — populates cache + await mod.getVSCodeVersion() + expect(fetchCount).toBe(1) + + // Advance clock past TTL + setSystemTime(new Date(Date.now() + CACHE_TTL + 1)) + + try { + // Second call — cache expired, should fetch again + await mod.getVSCodeVersion() + expect(fetchCount).toBe(2) + } finally { + // Always reset system time + setSystemTime() + } + }) +}) + +// --------------------------------------------------------------------------- +// getCopilotChatVersion tests +// --------------------------------------------------------------------------- + +describe("getCopilotChatVersion", () => { + const MARKETPLACE_URL = + "https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery" + + const validMarketplaceResponse = { + results: [ + { + extensions: [ + { + versions: [{ version: "0.30.1" }], + }, + ], + }, + ], + } + + test("returns version from Marketplace API", async () => { + globalThis.fetch = makeFetchMock((url) => { + expect(url).toBe(MARKETPLACE_URL) + return jsonResponse(validMarketplaceResponse) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now()}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.30.1") + }) + + test("returns hardcoded fallback on network error", async () => { + globalThis.fetch = makeFetchMock((_url) => { + throw new Error("connection refused") + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 1}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + test("returns hardcoded fallback when API response has unexpected shape", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return jsonResponse({ results: [] }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 2}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + test("cache prevents second fetch within TTL", async () => { + let fetchCallCount = 0 + globalThis.fetch = makeFetchMock((_url) => { + fetchCallCount++ + return jsonResponse(validMarketplaceResponse) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 3}` + )) as CopilotChatVersionModule + + const v1 = await mod.getCopilotChatVersion() + const v2 = await mod.getCopilotChatVersion() + + expect(v1).toBe("0.30.1") + expect(v2).toBe("0.30.1") + expect(fetchCallCount).toBe(1) + }) + + // T3 — Marketplace returns HTTP 503 + test("T3: returns fallback when Marketplace returns HTTP 503", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return new Response("Service Unavailable", { status: 503 }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 10}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + // T4 — Marketplace returns version: "" (empty string) + test("T4: returns fallback when Marketplace version is empty string", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return jsonResponse({ + results: [{ extensions: [{ versions: [{ version: "" }] }] }], + }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 11}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) + + // T7 — Format validation rejects CRLF-injected version + test("T7: rejects version with CRLF injection and returns fallback", async () => { + globalThis.fetch = makeFetchMock((_url) => { + return jsonResponse({ + results: [ + { + extensions: [{ versions: [{ version: "1.0.0\r\nEvil: header" }] }], + }, + ], + }) + }) + + const mod = (await import( + `../src/services/get-copilot-chat-version.ts?t=${Date.now() + 12}` + )) as CopilotChatVersionModule + const version = await mod.getCopilotChatVersion() + expect(version).toBe("0.26.7") + }) +}) + +// --------------------------------------------------------------------------- +// State interface test — shape check +// --------------------------------------------------------------------------- + +describe("State type includes copilotChatVersion", () => { + test("state object accepts copilotChatVersion field", async () => { + const { state } = (await import( + `../src/lib/state.ts?t=${Date.now()}` + )) as StateModule + // Field must be optionally present (undefined by default) + expect(state.copilotChatVersion).toBeUndefined() + + // Should be assignable without TS errors (runtime check) + state.copilotChatVersion = "0.26.7" + expect(state.copilotChatVersion).toBe("0.26.7") + }) +}) + +// --------------------------------------------------------------------------- +// T6 — api-config header uses fallback when copilotChatVersion is undefined +// --------------------------------------------------------------------------- + +describe("copilotHeaders fallback", () => { + test("T6: editor-plugin-version uses fallback string when state.copilotChatVersion is undefined", async () => { + const { copilotHeaders } = await import("../src/lib/api-config") + const minimalState = { + accountType: "individual", + manualApprove: false, + rateLimitWait: false, + showToken: false, + copilotToken: "tok", + vsCodeVersion: "1.99.0", + copilotChatVersion: undefined, + } + + const headers = copilotHeaders( + minimalState as Parameters[0], + false, + ) + expect(headers["editor-plugin-version"]).not.toBe("copilot-chat/undefined") + expect(headers["editor-plugin-version"]).toBe("copilot-chat/0.26.7") + }) +})