From cc11c1d22b3f6e4c805bb314699ba00cc4790708 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 01:50:28 +0800 Subject: [PATCH 1/3] feat(native-anthropic): add pass-through service, dispatch, type fixes (#38-#45) --- docs/prd/native-anthropic-passthrough.md | 74 ++++++++ src/routes/messages/anthropic-types.ts | 39 ++++- src/routes/messages/handler.ts | 75 +++++++- src/routes/messages/non-stream-translation.ts | 26 ++- .../copilot/create-messages-native.ts | 161 ++++++++++++++++++ src/services/copilot/native-models.ts | 57 +++++++ 6 files changed, 412 insertions(+), 20 deletions(-) create mode 100644 docs/prd/native-anthropic-passthrough.md create mode 100644 src/services/copilot/create-messages-native.ts create mode 100644 src/services/copilot/native-models.ts diff --git a/docs/prd/native-anthropic-passthrough.md b/docs/prd/native-anthropic-passthrough.md new file mode 100644 index 000000000..606c98ca8 --- /dev/null +++ b/docs/prd/native-anthropic-passthrough.md @@ -0,0 +1,74 @@ +# Native Anthropic Pass-Through for Claude Models + +## Status +Approved + +## Overview +Route Anthropic `/v1/messages` requests for Claude models directly to the GitHub Copilot upstream's native Anthropic endpoint, bypassing the existing OpenAI translation layer. This preserves thinking blocks with `signature` field, `top_k`, `cache_control`, and richer usage stats — none of which survive the current translation round-trip. + +## Motivation +GitHub Copilot's upstream (`api.enterprise.githubcopilot.com`) natively speaks the Anthropic Messages API for all Claude 4.5+ models. The current code path translates Anthropic → OpenAI → sends → translates back, losing: +- `thinking` blocks (completely dropped) +- `signature` field on thinking blocks (required for multi-turn reasoning) +- `cache_creation_input_tokens` in usage +- `top_k` parameter +- `cache_control` on system/user blocks + +The fix: detect Claude models by `vendor === "Anthropic"` from the `/models` endpoint, and forward requests verbatim to `/v1/messages` upstream. + +## Requirements + +1. **`create-messages-native.ts`** — Service client that POSTs Anthropic payloads directly to `${copilotBaseUrl}/v1/messages` with correct headers (`anthropic-version`, `anthropic-beta`). +2. **Route dispatch** — `handler.ts` checks `isNativeAnthropicModel(model)` and branches to native path for Claude, translation path for everything else. +3. **`native-models.ts`** — `isNativeAnthropicModel(modelId)` checks `state.models` vendor field; falls back to `claude-` prefix heuristic before models load. +4. **Type fixes** — `anthropic-types.ts`: `signature?` on `AnthropicThinkingBlock`; union `thinking` type for adaptive (opus-4.7+); `output_config`; `AnthropicImageBlock` URL source; `AnthropicToolResultBlock.content` widened. +5. **Adaptive thinking upgrade** — `create-messages-native.ts` auto-upgrades `{ type: "enabled" }` → `{ type: "adaptive" }` + `output_config.effort` for `claude-opus-4.7+` models. +6. **SSE proxy** — Streaming responses from native path forwarded verbatim to client (no re-translation needed). + +## Acceptance Criteria + +- Claude models (`vendor === "Anthropic"`) route to native path; non-Claude models route to translation path. +- Thinking blocks with `signature` field returned to client in both streaming and non-streaming. +- Multi-turn conversations with thinking blocks (echoing `signature`) work correctly. +- `claude-opus-4.7+` with `{ type: "enabled" }` thinking auto-upgrades to adaptive format; no HTTP 400. +- All existing tests pass; new tests cover native vs. translation dispatch. + +## Technical Approach + +### Model detection +`state.models.data` from `/models` endpoint has `vendor: "Anthropic"` for all Claude models. `isNativeAnthropicModel()` checks this first, falls back to `startsWith("claude-")` heuristic. + +### Headers for native path +``` +anthropic-version: 2023-06-01 +anthropic-beta: interleaved-thinking-2025-05-14,prompt-caching-2024-07-31 +``` +Plus all standard Copilot headers (auth, editor-version, etc.). + +### Streaming proxy +Native upstream sends proper Anthropic SSE events. Parse `event.type` for logging; forward `rawEvent.data` verbatim. No translation needed. + +### Adaptive thinking (opus-4.7+) +If model matches `/^claude-opus-4[.-](\d+)/` with minor ≥ 7, auto-upgrade `{ type: "enabled", budget_tokens: N }` → `{ type: "adaptive" }` + `output_config: { effort: "medium" }`. + +## File Changes + +**New:** +- `src/services/copilot/create-messages-native.ts` +- `src/services/copilot/native-models.ts` + +**Modified:** +- `src/routes/messages/anthropic-types.ts` — type fixes +- `src/routes/messages/handler.ts` — dispatch logic +- `src/routes/messages/non-stream-translation.ts` — remove stale comment; fix image source narrowing + +## Testing Strategy +- Unit: `isNativeAnthropicModel()` with populated vs empty `state.models` +- Unit: `buildUpstreamPayload()` adaptive thinking upgrade +- Integration: handler routes Claude models to native, GPT models to translation +- Existing translation tests must still pass + +## Out of Scope +- Persistent caching of native responses +- URL image sources (rejected by upstream; type kept for fidelity) +- Responses API (#1 epic) diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..dda7657b3 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -18,9 +18,16 @@ export interface AnthropicMessagesPayload { type: "auto" | "any" | "tool" | "none" name?: string } - thinking?: { - type: "enabled" - budget_tokens?: number + /** + * Thinking config. + * - Legacy (claude-3.7 / claude-4.5): `{ type: "enabled", budget_tokens: N }` + * - New adaptive (claude-opus-4.7+): `{ type: "adaptive" }` paired with + * `output_config.effort` in the request body. + */ + thinking?: { type: "enabled"; budget_tokens?: number } | { type: "adaptive" } + /** Used together with `thinking: { type: "adaptive" }` on opus-4.7+. */ + output_config?: { + effort?: "low" | "medium" | "high" } service_tier?: "auto" | "standard_only" } @@ -32,17 +39,24 @@ export interface AnthropicTextBlock { export interface AnthropicImageBlock { type: "image" - source: { - type: "base64" - media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" - data: string - } + source: + | { + type: "base64" + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp" + data: string + } + | { + /** URL images are rejected by Copilot upstream — kept for type fidelity only. */ + type: "url" + url: string + } } export interface AnthropicToolResultBlock { type: "tool_result" tool_use_id: string - content: string + /** May be a plain string or an array of content blocks. */ + content: string | Array is_error?: boolean } @@ -56,6 +70,12 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + /** + * Opaque signature returned by the upstream for extended thinking blocks. + * Must be echoed back in subsequent turns to enable multi-turn reasoning. + * Present on native pass-through responses; absent on translated responses. + */ + signature?: string } export type AnthropicUserContentBlock = @@ -106,6 +126,7 @@ export interface AnthropicResponse { output_tokens: number cache_creation_input_tokens?: number cache_read_input_tokens?: number + /** Present on native pass-through responses. */ service_tier?: "standard" | "priority" | "batch" } } diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..cf691ffd7 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -11,9 +11,12 @@ import { type ChatCompletionChunk, type ChatCompletionResponse, } from "~/services/copilot/create-chat-completions" +import { createMessagesNative } from "~/services/copilot/create-messages-native" +import { isNativeAnthropicModel } from "~/services/copilot/native-models" import { type AnthropicMessagesPayload, + type AnthropicStreamEventData, type AnthropicStreamState, } from "./anthropic-types" import { @@ -28,16 +31,80 @@ export async function handleCompletion(c: Context) { const anthropicPayload = await c.req.json() consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) + if (state.manualApprove) { + await awaitApproval() + } + + // Route to native Anthropic pass-through for Claude models to preserve + // thinking blocks (with signature), top_k, cache_control, and richer usage. + if (isNativeAnthropicModel(anthropicPayload.model)) { + return handleNative(c, anthropicPayload) + } + + return handleTranslated(c, anthropicPayload) +} + +// --------------------------------------------------------------------------- +// Native Anthropic pass-through (Claude 4.5+ models) +// --------------------------------------------------------------------------- + +async function handleNative( + c: Context, + payload: AnthropicMessagesPayload, +): Promise { + consola.debug("Using native Anthropic pass-through for", payload.model) + + const response = await createMessagesNative(payload) + + if (!payload.stream) { + // Non-streaming: upstream already returned a complete Anthropic response + consola.debug( + "Native non-streaming response:", + JSON.stringify(response).slice(0, 400), + ) + return c.json(response) + } + + // Streaming: proxy the SSE events directly to the client + consola.debug("Native streaming response — proxying SSE events") + return streamSSE(c, async (stream) => { + for await (const rawEvent of response as AsyncIterable<{ + data?: string + event?: string + }>) { + if (rawEvent.data === "[DONE]") break + if (!rawEvent.data) continue + + // Parse to log but forward the original JSON verbatim + try { + const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData + consola.debug("Native SSE event:", parsed.type) + await stream.writeSSE({ + event: parsed.type, + data: rawEvent.data, + }) + } catch { + // Malformed chunk — skip + consola.warn("Could not parse native SSE chunk:", rawEvent.data) + } + } + }) +} + +// --------------------------------------------------------------------------- +// Translation path (non-Claude models via /chat/completions) +// --------------------------------------------------------------------------- + +async function handleTranslated( + c: Context, + anthropicPayload: AnthropicMessagesPayload, +): Promise { const openAIPayload = translateToOpenAI(anthropicPayload) consola.debug( "Translated OpenAI request payload:", JSON.stringify(openAIPayload), ) - if (state.manualApprove) { - await awaitApproval() - } - const response = await createChatCompletions(openAIPayload) if (isNonStreaming(response)) { diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..e154c3714 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,5 @@ +import consola from "consola" + import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -213,12 +215,20 @@ function mapContent( break } case "image": { - contentParts.push({ - type: "image_url", - image_url: { - url: `data:${block.source.media_type};base64,${block.source.data}`, - }, - }) + if (block.source.type === "base64") { + contentParts.push({ + type: "image_url", + image_url: { + url: `data:${block.source.media_type};base64,${block.source.data}`, + }, + }) + } else { + // URL images are rejected by Copilot upstream — skip silently + // (type kept for fidelity when round-tripping through native path) + consola.warn( + "URL image source not supported in translation path — skipping", + ) + } break } @@ -302,7 +312,9 @@ export function translateToAnthropic( } } - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses + // Note: the translation path routes Claude models via /chat/completions which + // does not return thinking blocks. For thinking block support use the native + // Anthropic pass-through path (create-messages-native.ts). return { id: response.id, diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts new file mode 100644 index 000000000..0fef793bf --- /dev/null +++ b/src/services/copilot/create-messages-native.ts @@ -0,0 +1,161 @@ +/** + * Native Anthropic pass-through service. + * + * The GitHub Copilot upstream (`api.enterprise.githubcopilot.com`) natively + * speaks the Anthropic Messages API for all Claude 4.5+ models. Routing + * requests directly to `/v1/messages` instead of translating them through + * `/chat/completions` gives us: + * + * - Real thinking blocks with `signature` field (multi-turn reasoning) + * - `cache_creation_input_tokens` in usage + * - `top_k` support + * - No lossy translation round-trip + * + * See research notes: ~/copilot-models-litellm/copilot_models.py + */ + +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +/** + * Forward an Anthropic-format request directly to Copilot's native `/v1/messages` + * endpoint, preserving all fields (thinking, signature, top_k, cache_control, …). + * + * Returns: + * - For non-streaming: the raw Anthropic JSON response object + * - For streaming: an async iterable of SSE events (fetch-event-stream) + */ +export const createMessagesNative = async ( + payload: AnthropicMessagesPayload, +) => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const hasVision = messageHasImages(payload) + const headers = buildNativeHeaders(hasVision) + + const upstream = `${copilotBaseUrl(state)}/v1/messages` + consola.debug("Native Anthropic upstream:", upstream) + + // Strip fields that are Copilot-API–specific or unsupported by upstream + const body = buildUpstreamPayload(payload) + + const response = await fetch(upstream, { + method: "POST", + headers, + body: JSON.stringify(body), + }) + + if (!response.ok) { + consola.error("Native Anthropic upstream error", response.status) + throw new HTTPError("Native Anthropic upstream error", response) + } + + if (payload.stream) { + return events(response) + } + + return response.json() +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Build headers for the Anthropic native endpoint. + * + * The upstream requires `anthropic-version` and does NOT want an `openai-intent` + * header. We reuse `copilotHeaders()` for auth/agent headers and then layer the + * Anthropic-specific ones on top. + */ +function buildNativeHeaders(vision: boolean): Record { + const base = copilotHeaders(state, vision) + + // The native /v1/messages endpoint expects these Anthropic headers + return { + ...base, + "anthropic-version": "2023-06-01", + // Enable beta features: extended thinking + prompt caching + "anthropic-beta": + "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", + // Accept Anthropic streaming format + accept: "text/event-stream", + // The upstream doesn't use openai-intent for the messages path + // but leaving it does no harm; keep for header consistency + } +} + +/** + * Produce the payload forwarded to upstream. + * + * We pass through almost everything verbatim. The only transformation is that + * `claude-opus-4.7+` requires the new adaptive thinking format + * (`thinking: { type: "adaptive" }` + `output_config.effort`) rather than the + * legacy `{ type: "enabled", budget_tokens: N }`. If the caller already sent + * the correct format we leave it alone; if they sent the old format and the + * model requires adaptive, we upgrade automatically. + */ +function buildUpstreamPayload( + payload: AnthropicMessagesPayload, +): AnthropicMessagesPayload { + const { thinking, output_config, ...rest } = payload + + if (!thinking) { + return payload + } + + if (isAdaptiveThinkingModel(payload.model)) { + // Upgrade legacy enabled → adaptive if needed + if (thinking.type === "enabled") { + consola.debug( + `Upgrading thinking format to adaptive for model ${payload.model}`, + ) + return { + ...rest, + thinking: { type: "adaptive" }, + output_config: output_config ?? { effort: "medium" }, + } + } + // Already adaptive — forward as-is + return { ...rest, thinking, output_config } + } + + // Non-adaptive model — forward legacy format, drop output_config + return { ...rest, thinking } +} + +/** + * Models that require the new adaptive thinking API. + * Populated dynamically at dispatch time via `isNativeAnthropicModel()`. + * This hard-coded check is the fallback. + */ +function isAdaptiveThinkingModel(model: string): boolean { + // claude-opus-4.7 and above use adaptive thinking + const match = model.match(/^claude-opus-4[.-](\d+)/) + if (match) { + const minor = Number.parseInt(match[1], 10) + return minor >= 7 + } + return false +} + +/** + * Check whether the request contains any image blocks (to set vision headers). + */ +function messageHasImages(payload: AnthropicMessagesPayload): boolean { + for (const msg of payload.messages) { + if (typeof msg.content === "string") continue + if (Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "image") return true + } + } + } + return false +} diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts new file mode 100644 index 000000000..e411b34e4 --- /dev/null +++ b/src/services/copilot/native-models.ts @@ -0,0 +1,57 @@ +/** + * Dynamic detection of which models support native Anthropic pass-through. + * + * The Copilot `/models` endpoint returns a `vendor` field for each model. + * Any model with `vendor === "Anthropic"` is served natively via the + * `/v1/messages` path at `api.enterprise.githubcopilot.com`. + * + * We cache the set of native model IDs after the first `/models` call and + * keep it in sync with `state.models` (which is refreshed periodically by + * the token-rotation logic). + */ + +import { state } from "~/lib/state" + +/** + * Returns true if the given model ID should be routed to the native + * Anthropic pass-through service instead of the OpenAI chat-completions + * translation layer. + * + * Resolution order: + * 1. If `state.models` is populated, check whether the model's vendor is + * "Anthropic" (live, always up-to-date). + * 2. Fall back to a static prefix list for resilience at startup before + * the models list is fetched. + */ +export function isNativeAnthropicModel(modelId: string): boolean { + if (state.models?.data) { + const entry = state.models.data.find((m) => m.id === modelId) + if (entry) { + return entry.vendor === "Anthropic" + } + // Model not found in list — fall through to prefix heuristic + } + + return matchesAnthropicPrefix(modelId) +} + +/** + * Static prefix heuristic used before `state.models` is populated. + * Covers all current Claude variants served by Copilot. + */ +function matchesAnthropicPrefix(modelId: string): boolean { + return modelId.startsWith("claude-") || modelId.startsWith("claude_") +} + +/** + * Return the full list of model IDs that support native Anthropic pass-through, + * derived from `state.models`. Useful for logging / diagnostics. + * + * Falls back to an empty array if the models list has not been fetched yet. + */ +export function nativeAnthropicModelIds(): ReadonlyArray { + if (!state.models?.data) return [] + return state.models.data + .filter((m) => m.vendor === "Anthropic") + .map((m) => m.id) +} From dce9e6c3355e686ea28ddbecb5d6891adb255ce8 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 01:57:22 +0800 Subject: [PATCH 2/3] fix: address review round 1 feedback for native pass-through (#38) - H1: Remove dead [DONE] sentinel from native SSE loop (Anthropic terminates via connection close) - H3: Conditionally set accept: text/event-stream only when streaming - M1: buildUpstreamPayload returns rest (not payload) when thinking absent, stripping output_config - M2: Truncate raw SSE data to 200 chars in warn log to prevent log injection - L2: Remove claude_ underscore prefix heuristic (no known Anthropic model uses it) - L3: Document >= 7 threshold comment in isAdaptiveThinkingModel - L4: Replace verbose JSDoc on nativeAnthropicModelIds with concise standard form - Export buildUpstreamPayload for direct unit testing - Add tests/native-passthrough.test.ts with T1-T9 covering payload transform and model routing Co-Authored-By: Claude Sonnet 4.6 --- src/routes/messages/handler.ts | 6 +- .../copilot/create-messages-native.ts | 19 ++- src/services/copilot/native-models.ts | 8 +- tests/native-passthrough.test.ts | 160 ++++++++++++++++++ 4 files changed, 178 insertions(+), 15 deletions(-) create mode 100644 tests/native-passthrough.test.ts diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index cf691ffd7..2bf1005ea 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -72,7 +72,6 @@ async function handleNative( data?: string event?: string }>) { - if (rawEvent.data === "[DONE]") break if (!rawEvent.data) continue // Parse to log but forward the original JSON verbatim @@ -85,7 +84,10 @@ async function handleNative( }) } catch { // Malformed chunk — skip - consola.warn("Could not parse native SSE chunk:", rawEvent.data) + consola.warn( + "Could not parse native SSE chunk:", + rawEvent.data.slice(0, 200), + ) } } }) diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts index 0fef793bf..e6c41cb72 100644 --- a/src/services/copilot/create-messages-native.ts +++ b/src/services/copilot/create-messages-native.ts @@ -37,7 +37,7 @@ export const createMessagesNative = async ( if (!state.copilotToken) throw new Error("Copilot token not found") const hasVision = messageHasImages(payload) - const headers = buildNativeHeaders(hasVision) + const headers = buildNativeHeaders(hasVision, Boolean(payload.stream)) const upstream = `${copilotBaseUrl(state)}/v1/messages` consola.debug("Native Anthropic upstream:", upstream) @@ -74,7 +74,10 @@ export const createMessagesNative = async ( * header. We reuse `copilotHeaders()` for auth/agent headers and then layer the * Anthropic-specific ones on top. */ -function buildNativeHeaders(vision: boolean): Record { +function buildNativeHeaders( + vision: boolean, + stream: boolean, +): Record { const base = copilotHeaders(state, vision) // The native /v1/messages endpoint expects these Anthropic headers @@ -84,10 +87,9 @@ function buildNativeHeaders(vision: boolean): Record { // Enable beta features: extended thinking + prompt caching "anthropic-beta": "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", - // Accept Anthropic streaming format - accept: "text/event-stream", - // The upstream doesn't use openai-intent for the messages path - // but leaving it does no harm; keep for header consistency + // Only request SSE streaming format when the caller is streaming; + // non-streaming calls should use the default application/json accept + ...(stream ? { accept: "text/event-stream" } : {}), } } @@ -101,13 +103,13 @@ function buildNativeHeaders(vision: boolean): Record { * the correct format we leave it alone; if they sent the old format and the * model requires adaptive, we upgrade automatically. */ -function buildUpstreamPayload( +export function buildUpstreamPayload( payload: AnthropicMessagesPayload, ): AnthropicMessagesPayload { const { thinking, output_config, ...rest } = payload if (!thinking) { - return payload + return rest // safe: output_config only valid alongside thinking } if (isAdaptiveThinkingModel(payload.model)) { @@ -140,6 +142,7 @@ function isAdaptiveThinkingModel(model: string): boolean { const match = model.match(/^claude-opus-4[.-](\d+)/) if (match) { const minor = Number.parseInt(match[1], 10) + // claude-opus-4.7 and later use the new adaptive thinking API (not legacy budget_tokens) return minor >= 7 } return false diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts index e411b34e4..7c3d4f59b 100644 --- a/src/services/copilot/native-models.ts +++ b/src/services/copilot/native-models.ts @@ -40,14 +40,12 @@ export function isNativeAnthropicModel(modelId: string): boolean { * Covers all current Claude variants served by Copilot. */ function matchesAnthropicPrefix(modelId: string): boolean { - return modelId.startsWith("claude-") || modelId.startsWith("claude_") + return modelId.startsWith("claude-") } /** - * Return the full list of model IDs that support native Anthropic pass-through, - * derived from `state.models`. Useful for logging / diagnostics. - * - * Falls back to an empty array if the models list has not been fetched yet. + * Returns all model IDs that support native Anthropic pass-through. + * Used for diagnostics and startup logging. */ export function nativeAnthropicModelIds(): ReadonlyArray { if (!state.models?.data) return [] diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts new file mode 100644 index 000000000..759df813d --- /dev/null +++ b/tests/native-passthrough.test.ts @@ -0,0 +1,160 @@ +import { describe, test, expect, afterEach } from "bun:test" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +import { state } from "~/lib/state" +import { buildUpstreamPayload } from "~/services/copilot/create-messages-native" +import { isNativeAnthropicModel } from "~/services/copilot/native-models" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Minimal valid payload base — only the fields required by the type. */ +function basePayload( + overrides: Partial, +): AnthropicMessagesPayload { + return { + model: "claude-sonnet-4-5", + messages: [{ role: "user", content: "hi" }], + max_tokens: 1024, + ...overrides, + } +} + +// --------------------------------------------------------------------------- +// buildUpstreamPayload tests +// --------------------------------------------------------------------------- + +describe("buildUpstreamPayload", () => { + // T1 — output_config present but thinking absent → output_config stripped + test("T1: strips output_config when thinking is absent", () => { + const payload = basePayload({ + output_config: { effort: "high" }, + }) + const result = buildUpstreamPayload(payload) + expect(result).not.toHaveProperty("output_config") + expect(result).not.toHaveProperty("thinking") + }) + + // T2 — adaptive upgrade preserves explicit output_config: { effort: "high" } + test("T2: adaptive upgrade preserves explicit output_config effort", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + output_config: { effort: "high" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + // Should keep caller's "high", not override to "medium" + expect(result.output_config).toEqual({ effort: "high" }) + }) + + // T3 — already adaptive → forwarded as-is + test("T3: already-adaptive thinking forwarded as-is", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "adaptive" }, + output_config: { effort: "low" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "low" }) + }) + + // T4 — legacy model with enabled thinking → kept as-is, no adaptive upgrade + test("T4: legacy model with enabled thinking kept as-is", () => { + const payload = basePayload({ + model: "claude-sonnet-4-5", + thinking: { type: "enabled", budget_tokens: 1024 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) + expect(result).not.toHaveProperty("output_config") + }) +}) + +// --------------------------------------------------------------------------- +// isNativeAnthropicModel tests +// --------------------------------------------------------------------------- + +// Save original models state and restore after each test +const originalModels = state.models + +afterEach(() => { + state.models = originalModels +}) + +describe("isNativeAnthropicModel", () => { + // T5 — model in loaded list with vendor "Anthropic" → true + test("T5: model with vendor Anthropic in loaded list → true", () => { + state.models = { + object: "list", + data: [ + { + id: "claude-sonnet-4-5", + vendor: "Anthropic", + name: "Claude Sonnet 4.5", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "claude", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(isNativeAnthropicModel("claude-sonnet-4-5")).toBe(true) + }) + + // T6 — model in loaded list with vendor "OpenAI" → false + test("T6: model with vendor OpenAI in loaded list → false", () => { + state.models = { + object: "list", + data: [ + { + id: "gpt-4o", + vendor: "OpenAI", + name: "GPT-4o", + object: "model", + version: "1", + preview: false, + model_picker_enabled: true, + capabilities: { + family: "gpt", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "cl100k_base", + type: "chat", + }, + }, + ], + } + expect(isNativeAnthropicModel("gpt-4o")).toBe(false) + }) + + // T7 — model NOT in loaded list, starts with "claude-" → true (heuristic) + test("T7: model not in loaded list but starts with claude- → true", () => { + state.models = { object: "list", data: [] } + expect(isNativeAnthropicModel("claude-future-1")).toBe(true) + }) + + // T8 — model NOT in loaded list, starts with "gpt-" → false + test("T8: model not in loaded list and starts with gpt- → false", () => { + state.models = { object: "list", data: [] } + expect(isNativeAnthropicModel("gpt-5")).toBe(false) + }) + + // T9 — state.models undefined → heuristic + test("T9: state.models undefined → heuristic (claude- prefix → true)", () => { + state.models = undefined + expect(isNativeAnthropicModel("claude-something")).toBe(true) + }) +}) From 6c92355168144fbcdc60cb11ec3dc56464aeab49 Mon Sep 17 00:00:00 2001 From: HXYerror <48976608+HXYerror@users.noreply.github.com> Date: Mon, 11 May 2026 11:11:37 +0800 Subject: [PATCH 3/3] fix: address review round 2 feedback for native pass-through (#38) --- src/routes/messages/handler.ts | 15 +-- .../copilot/create-messages-native.ts | 19 ++-- src/services/copilot/native-models.ts | 19 ---- tests/native-passthrough.test.ts | 105 +++++++++++++++++- 4 files changed, 120 insertions(+), 38 deletions(-) diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 2bf1005ea..e383b2dc0 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -74,18 +74,19 @@ async function handleNative( }>) { if (!rawEvent.data) continue - // Parse to log but forward the original JSON verbatim + // Forward verbatim — never block on parse failure + await stream.writeSSE({ + event: rawEvent.event, + data: rawEvent.data, + }) + + // Parse only for debug logging try { const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData consola.debug("Native SSE event:", parsed.type) - await stream.writeSSE({ - event: parsed.type, - data: rawEvent.data, - }) } catch { - // Malformed chunk — skip consola.warn( - "Could not parse native SSE chunk:", + "Could not parse native SSE chunk for logging:", rawEvent.data.slice(0, 200), ) } diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts index e6c41cb72..8b9fb54fd 100644 --- a/src/services/copilot/create-messages-native.ts +++ b/src/services/copilot/create-messages-native.ts @@ -80,15 +80,16 @@ function buildNativeHeaders( ): Record { const base = copilotHeaders(state, vision) - // The native /v1/messages endpoint expects these Anthropic headers + // Remove headers that are OpenAI-specific and not expected by Anthropic endpoint + const { "openai-intent": _dropped, ...anthropicBase } = base + return { - ...base, + ...anthropicBase, "anthropic-version": "2023-06-01", // Enable beta features: extended thinking + prompt caching "anthropic-beta": "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31", - // Only request SSE streaming format when the caller is streaming; - // non-streaming calls should use the default application/json accept + // Only request SSE streaming format when the caller is streaming ...(stream ? { accept: "text/event-stream" } : {}), } } @@ -121,7 +122,8 @@ export function buildUpstreamPayload( return { ...rest, thinking: { type: "adaptive" }, - output_config: output_config ?? { effort: "medium" }, + output_config: + output_config?.effort ? output_config : { effort: "medium" }, } } // Already adaptive — forward as-is @@ -133,9 +135,10 @@ export function buildUpstreamPayload( } /** - * Models that require the new adaptive thinking API. - * Populated dynamically at dispatch time via `isNativeAnthropicModel()`. - * This hard-coded check is the fallback. + * Returns true for models that require the adaptive thinking API + * (`{ type: "adaptive" }` + `output_config.effort`) rather than the + * legacy `{ type: "enabled", budget_tokens: N }`. + * Currently: claude-opus-4.7 and later. */ function isAdaptiveThinkingModel(model: string): boolean { // claude-opus-4.7 and above use adaptive thinking diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts index 7c3d4f59b..7d731d01b 100644 --- a/src/services/copilot/native-models.ts +++ b/src/services/copilot/native-models.ts @@ -32,24 +32,5 @@ export function isNativeAnthropicModel(modelId: string): boolean { // Model not found in list — fall through to prefix heuristic } - return matchesAnthropicPrefix(modelId) -} - -/** - * Static prefix heuristic used before `state.models` is populated. - * Covers all current Claude variants served by Copilot. - */ -function matchesAnthropicPrefix(modelId: string): boolean { return modelId.startsWith("claude-") } - -/** - * Returns all model IDs that support native Anthropic pass-through. - * Used for diagnostics and startup logging. - */ -export function nativeAnthropicModelIds(): ReadonlyArray { - if (!state.models?.data) return [] - return state.models.data - .filter((m) => m.vendor === "Anthropic") - .map((m) => m.id) -} diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts index 759df813d..6e4f09c72 100644 --- a/tests/native-passthrough.test.ts +++ b/tests/native-passthrough.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, afterEach } from "bun:test" +import { describe, test, expect, beforeEach, afterEach } from "bun:test" import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" @@ -72,17 +72,45 @@ describe("buildUpstreamPayload", () => { expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) expect(result).not.toHaveProperty("output_config") }) + + // T5 — adaptive upgrade with no output_config → defaults to effort:medium + test("T5: adaptive upgrade with no output_config defaults to effort:medium", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled", budget_tokens: 1024 }, + // output_config intentionally absent + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "medium" }) + }) + + // T6 — output_config: {} also triggers default (not bypassed) + test("T6: empty output_config triggers medium effort default", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + output_config: {}, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + expect(result.output_config).toEqual({ effort: "medium" }) + }) }) // --------------------------------------------------------------------------- // isNativeAnthropicModel tests // --------------------------------------------------------------------------- -// Save original models state and restore after each test -const originalModels = state.models +// Per-test state isolation +let savedModels: typeof state.models + +beforeEach(() => { + savedModels = state.models +}) afterEach(() => { - state.models = originalModels + state.models = savedModels }) describe("isNativeAnthropicModel", () => { @@ -158,3 +186,72 @@ describe("isNativeAnthropicModel", () => { expect(isNativeAnthropicModel("claude-something")).toBe(true) }) }) + +// --------------------------------------------------------------------------- +// isAdaptiveThinkingModel boundary tests (via buildUpstreamPayload) +// --------------------------------------------------------------------------- + +describe("isAdaptiveThinkingModel boundaries (via buildUpstreamPayload)", () => { + // B1 — claude-opus-4.6 is NOT upgraded (one below threshold) + test("B1: claude-opus-4.6 does NOT get adaptive upgrade", () => { + const payload = basePayload({ + model: "claude-opus-4.6", + thinking: { type: "enabled", budget_tokens: 2048 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 2048 }) + expect(result).not.toHaveProperty("output_config") + }) + + // B2 — claude-opus-4.7 IS upgraded (exact threshold) + test("B2: claude-opus-4.7 (dot separator) IS upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-opus-4.7", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B3 — claude-opus-4-7 (dash separator) IS upgraded + test("B3: claude-opus-4-7 (dash separator) IS upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-opus-4-7", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B4 — claude-opus-4-6 (dash separator) is NOT upgraded + test("B4: claude-opus-4-6 (dash separator) NOT upgraded", () => { + const payload = basePayload({ + model: "claude-opus-4-6", + thinking: { type: "enabled", budget_tokens: 512 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 512 }) + expect(result).not.toHaveProperty("output_config") + }) + + // B5 — claude-opus-4.8 (one above threshold) IS upgraded + test("B5: claude-opus-4.8 (one above threshold) IS upgraded", () => { + const payload = basePayload({ + model: "claude-opus-4.8", + thinking: { type: "enabled" }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "adaptive" }) + }) + + // B6 — claude-sonnet-4.7 (non-opus) is NOT upgraded + test("B6: claude-sonnet-4.7 (non-opus) NOT upgraded to adaptive", () => { + const payload = basePayload({ + model: "claude-sonnet-4.7", + thinking: { type: "enabled", budget_tokens: 1024 }, + } as Partial) + const result = buildUpstreamPayload(payload) + expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 }) + expect(result).not.toHaveProperty("output_config") + }) +})