From cc11c1d22b3f6e4c805bb314699ba00cc4790708 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 01:50:28 +0800
Subject: [PATCH 1/3] feat(native-anthropic): add pass-through service,
 dispatch, type fixes (#38-#45)

---
 docs/prd/native-anthropic-passthrough.md      |  74 ++++++++
 src/routes/messages/anthropic-types.ts        |  39 ++++-
 src/routes/messages/handler.ts                |  75 +++++++-
 src/routes/messages/non-stream-translation.ts |  26 ++-
 .../copilot/create-messages-native.ts         | 161 ++++++++++++++++++
 src/services/copilot/native-models.ts         |  57 +++++++
 6 files changed, 412 insertions(+), 20 deletions(-)
 create mode 100644 docs/prd/native-anthropic-passthrough.md
 create mode 100644 src/services/copilot/create-messages-native.ts
 create mode 100644 src/services/copilot/native-models.ts

diff --git a/docs/prd/native-anthropic-passthrough.md b/docs/prd/native-anthropic-passthrough.md
new file mode 100644
index 000000000..606c98ca8
--- /dev/null
+++ b/docs/prd/native-anthropic-passthrough.md
@@ -0,0 +1,74 @@
+# Native Anthropic Pass-Through for Claude Models
+
+## Status
+Approved
+
+## Overview
+Route Anthropic `/v1/messages` requests for Claude models directly to the GitHub Copilot upstream's native Anthropic endpoint, bypassing the existing OpenAI translation layer. This preserves thinking blocks with `signature` field, `top_k`, `cache_control`, and richer usage stats — none of which survive the current translation round-trip.
+
+## Motivation
+GitHub Copilot's upstream (`api.enterprise.githubcopilot.com`) natively speaks the Anthropic Messages API for all Claude 4.5+ models. The current code path translates Anthropic → OpenAI → sends → translates back, losing:
+- `thinking` blocks (completely dropped)
+- `signature` field on thinking blocks (required for multi-turn reasoning)
+- `cache_creation_input_tokens` in usage
+- `top_k` parameter
+- `cache_control` on system/user blocks
+
+The fix: detect Claude models by `vendor === "Anthropic"` from the `/models` endpoint, and forward requests verbatim to `/v1/messages` upstream.
+
+## Requirements
+
+1. **`create-messages-native.ts`** — Service client that POSTs Anthropic payloads directly to `${copilotBaseUrl}/v1/messages` with correct headers (`anthropic-version`, `anthropic-beta`).
+2. **Route dispatch** — `handler.ts` checks `isNativeAnthropicModel(model)` and branches to native path for Claude, translation path for everything else.
+3. **`native-models.ts`** — `isNativeAnthropicModel(modelId)` checks `state.models` vendor field; falls back to `claude-` prefix heuristic before models load.
+4. **Type fixes** — `anthropic-types.ts`: `signature?` on `AnthropicThinkingBlock`; union `thinking` type for adaptive (opus-4.7+); `output_config`; `AnthropicImageBlock` URL source; `AnthropicToolResultBlock.content` widened.
+5. **Adaptive thinking upgrade** — `create-messages-native.ts` auto-upgrades `{ type: "enabled" }` → `{ type: "adaptive" }` + `output_config.effort` for `claude-opus-4.7+` models.
+6. **SSE proxy** — Streaming responses from native path forwarded verbatim to client (no re-translation needed).
+
+## Acceptance Criteria
+
+- Claude models (`vendor === "Anthropic"`) route to native path; non-Claude models route to translation path.
+- Thinking blocks with `signature` field returned to client in both streaming and non-streaming.
+- Multi-turn conversations with thinking blocks (echoing `signature`) work correctly.
+- `claude-opus-4.7+` with `{ type: "enabled" }` thinking auto-upgrades to adaptive format; no HTTP 400.
+- All existing tests pass; new tests cover native vs. translation dispatch.
+
+## Technical Approach
+
+### Model detection
+`state.models.data` from `/models` endpoint has `vendor: "Anthropic"` for all Claude models. `isNativeAnthropicModel()` checks this first, falls back to `startsWith("claude-")` heuristic.
+
+### Headers for native path
+```
+anthropic-version: 2023-06-01
+anthropic-beta: interleaved-thinking-2025-05-14,prompt-caching-2024-07-31
+```
+Plus all standard Copilot headers (auth, editor-version, etc.).
+
+### Streaming proxy
+Native upstream sends proper Anthropic SSE events. Parse `event.type` for logging; forward `rawEvent.data` verbatim. No translation needed.
+
+### Adaptive thinking (opus-4.7+)
+If model matches `/^claude-opus-4[.-](\d+)/` with minor ≥ 7, auto-upgrade `{ type: "enabled", budget_tokens: N }` → `{ type: "adaptive" }` + `output_config: { effort: "medium" }`.
+
+## File Changes
+
+**New:**
+- `src/services/copilot/create-messages-native.ts`
+- `src/services/copilot/native-models.ts`
+
+**Modified:**
+- `src/routes/messages/anthropic-types.ts` — type fixes
+- `src/routes/messages/handler.ts` — dispatch logic
+- `src/routes/messages/non-stream-translation.ts` — remove stale comment; fix image source narrowing
+
+## Testing Strategy
+- Unit: `isNativeAnthropicModel()` with populated vs empty `state.models`
+- Unit: `buildUpstreamPayload()` adaptive thinking upgrade
+- Integration: handler routes Claude models to native, GPT models to translation
+- Existing translation tests must still pass
+
+## Out of Scope
+- Persistent caching of native responses
+- URL image sources (rejected by upstream; type kept for fidelity)
+- Responses API (#1 epic)
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc8..dda7657b3 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -18,9 +18,16 @@ export interface AnthropicMessagesPayload {
     type: "auto" | "any" | "tool" | "none"
     name?: string
   }
-  thinking?: {
-    type: "enabled"
-    budget_tokens?: number
+  /**
+   * Thinking config.
+   * - Legacy (claude-3.7 / claude-4.5): `{ type: "enabled", budget_tokens: N }`
+   * - New adaptive (claude-opus-4.7+): `{ type: "adaptive" }` paired with
+   *   `output_config.effort` in the request body.
+   */
+  thinking?: { type: "enabled"; budget_tokens?: number } | { type: "adaptive" }
+  /** Used together with `thinking: { type: "adaptive" }` on opus-4.7+. */
+  output_config?: {
+    effort?: "low" | "medium" | "high"
   }
   service_tier?: "auto" | "standard_only"
 }
@@ -32,17 +39,24 @@ export interface AnthropicTextBlock {
 
 export interface AnthropicImageBlock {
   type: "image"
-  source: {
-    type: "base64"
-    media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
-    data: string
-  }
+  source:
+    | {
+        type: "base64"
+        media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
+        data: string
+      }
+    | {
+        /** URL images are rejected by Copilot upstream — kept for type fidelity only. */
+        type: "url"
+        url: string
+      }
 }
 
 export interface AnthropicToolResultBlock {
   type: "tool_result"
   tool_use_id: string
-  content: string
+  /** May be a plain string or an array of content blocks. */
+  content: string | Array<AnthropicTextBlock | AnthropicImageBlock>
   is_error?: boolean
 }
 
@@ -56,6 +70,12 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  /**
+   * Opaque signature returned by the upstream for extended thinking blocks.
+   * Must be echoed back in subsequent turns to enable multi-turn reasoning.
+   * Present on native pass-through responses; absent on translated responses.
+   */
+  signature?: string
 }
 
 export type AnthropicUserContentBlock =
@@ -106,6 +126,7 @@ export interface AnthropicResponse {
     output_tokens: number
     cache_creation_input_tokens?: number
     cache_read_input_tokens?: number
+    /** Present on native pass-through responses. */
     service_tier?: "standard" | "priority" | "batch"
   }
 }
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..cf691ffd7 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -11,9 +11,12 @@ import {
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { createMessagesNative } from "~/services/copilot/create-messages-native"
+import { isNativeAnthropicModel } from "~/services/copilot/native-models"
 
 import {
   type AnthropicMessagesPayload,
+  type AnthropicStreamEventData,
   type AnthropicStreamState,
 } from "./anthropic-types"
 import {
@@ -28,16 +31,80 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  // Route to native Anthropic pass-through for Claude models to preserve
+  // thinking blocks (with signature), top_k, cache_control, and richer usage.
+  if (isNativeAnthropicModel(anthropicPayload.model)) {
+    return handleNative(c, anthropicPayload)
+  }
+
+  return handleTranslated(c, anthropicPayload)
+}
+
+// ---------------------------------------------------------------------------
+// Native Anthropic pass-through (Claude 4.5+ models)
+// ---------------------------------------------------------------------------
+
+async function handleNative(
+  c: Context,
+  payload: AnthropicMessagesPayload,
+): Promise<Response> {
+  consola.debug("Using native Anthropic pass-through for", payload.model)
+
+  const response = await createMessagesNative(payload)
+
+  if (!payload.stream) {
+    // Non-streaming: upstream already returned a complete Anthropic response
+    consola.debug(
+      "Native non-streaming response:",
+      JSON.stringify(response).slice(0, 400),
+    )
+    return c.json(response)
+  }
+
+  // Streaming: proxy the SSE events directly to the client
+  consola.debug("Native streaming response — proxying SSE events")
+  return streamSSE(c, async (stream) => {
+    for await (const rawEvent of response as AsyncIterable<{
+      data?: string
+      event?: string
+    }>) {
+      if (rawEvent.data === "[DONE]") break
+      if (!rawEvent.data) continue
+
+      // Parse to log but forward the original JSON verbatim
+      try {
+        const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData
+        consola.debug("Native SSE event:", parsed.type)
+        await stream.writeSSE({
+          event: parsed.type,
+          data: rawEvent.data,
+        })
+      } catch {
+        // Malformed chunk — skip
+        consola.warn("Could not parse native SSE chunk:", rawEvent.data)
+      }
+    }
+  })
+}
+
+// ---------------------------------------------------------------------------
+// Translation path (non-Claude models via /chat/completions)
+// ---------------------------------------------------------------------------
+
+async function handleTranslated(
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+): Promise<Response> {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
 
-  if (state.manualApprove) {
-    await awaitApproval()
-  }
-
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..e154c3714 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,5 @@
+import consola from "consola"
+
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -213,12 +215,20 @@ function mapContent(
         break
       }
       case "image": {
-        contentParts.push({
-          type: "image_url",
-          image_url: {
-            url: `data:${block.source.media_type};base64,${block.source.data}`,
-          },
-        })
+        if (block.source.type === "base64") {
+          contentParts.push({
+            type: "image_url",
+            image_url: {
+              url: `data:${block.source.media_type};base64,${block.source.data}`,
+            },
+          })
+        } else {
+          // URL images are rejected by Copilot upstream — skip silently
+          // (type kept for fidelity when round-tripping through native path)
+          consola.warn(
+            "URL image source not supported in translation path — skipping",
+          )
+        }
 
         break
       }
@@ -302,7 +312,9 @@ export function translateToAnthropic(
     }
   }
 
-  // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
+  // Note: the translation path routes Claude models via /chat/completions which
+  // does not return thinking blocks. For thinking block support use the native
+  // Anthropic pass-through path (create-messages-native.ts).
 
   return {
     id: response.id,
diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts
new file mode 100644
index 000000000..0fef793bf
--- /dev/null
+++ b/src/services/copilot/create-messages-native.ts
@@ -0,0 +1,161 @@
+/**
+ * Native Anthropic pass-through service.
+ *
+ * The GitHub Copilot upstream (`api.enterprise.githubcopilot.com`) natively
+ * speaks the Anthropic Messages API for all Claude 4.5+ models.  Routing
+ * requests directly to `/v1/messages` instead of translating them through
+ * `/chat/completions` gives us:
+ *
+ *  - Real thinking blocks with `signature` field (multi-turn reasoning)
+ *  - `cache_creation_input_tokens` in usage
+ *  - `top_k` support
+ *  - No lossy translation round-trip
+ *
+ * See research notes: ~/copilot-models-litellm/copilot_models.py
+ */
+
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+/**
+ * Forward an Anthropic-format request directly to Copilot's native `/v1/messages`
+ * endpoint, preserving all fields (thinking, signature, top_k, cache_control, …).
+ *
+ * Returns:
+ *  - For non-streaming: the raw Anthropic JSON response object
+ *  - For streaming: an async iterable of SSE events (fetch-event-stream)
+ */
+export const createMessagesNative = async (
+  payload: AnthropicMessagesPayload,
+) => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const hasVision = messageHasImages(payload)
+  const headers = buildNativeHeaders(hasVision)
+
+  const upstream = `${copilotBaseUrl(state)}/v1/messages`
+  consola.debug("Native Anthropic upstream:", upstream)
+
+  // Strip fields that are Copilot-API–specific or unsupported by upstream
+  const body = buildUpstreamPayload(payload)
+
+  const response = await fetch(upstream, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(body),
+  })
+
+  if (!response.ok) {
+    consola.error("Native Anthropic upstream error", response.status)
+    throw new HTTPError("Native Anthropic upstream error", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return response.json()
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Build headers for the Anthropic native endpoint.
+ *
+ * The upstream requires `anthropic-version` and does NOT want an `openai-intent`
+ * header.  We reuse `copilotHeaders()` for auth/agent headers and then layer the
+ * Anthropic-specific ones on top.
+ */
+function buildNativeHeaders(vision: boolean): Record<string, string> {
+  const base = copilotHeaders(state, vision)
+
+  // The native /v1/messages endpoint expects these Anthropic headers
+  return {
+    ...base,
+    "anthropic-version": "2023-06-01",
+    // Enable beta features: extended thinking + prompt caching
+    "anthropic-beta":
+      "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31",
+    // Accept Anthropic streaming format
+    accept: "text/event-stream",
+    // The upstream doesn't use openai-intent for the messages path
+    // but leaving it does no harm; keep for header consistency
+  }
+}
+
+/**
+ * Produce the payload forwarded to upstream.
+ *
+ * We pass through almost everything verbatim.  The only transformation is that
+ * `claude-opus-4.7+` requires the new adaptive thinking format
+ * (`thinking: { type: "adaptive" }` + `output_config.effort`) rather than the
+ * legacy `{ type: "enabled", budget_tokens: N }`.  If the caller already sent
+ * the correct format we leave it alone; if they sent the old format and the
+ * model requires adaptive, we upgrade automatically.
+ */
+function buildUpstreamPayload(
+  payload: AnthropicMessagesPayload,
+): AnthropicMessagesPayload {
+  const { thinking, output_config, ...rest } = payload
+
+  if (!thinking) {
+    return payload
+  }
+
+  if (isAdaptiveThinkingModel(payload.model)) {
+    // Upgrade legacy enabled → adaptive if needed
+    if (thinking.type === "enabled") {
+      consola.debug(
+        `Upgrading thinking format to adaptive for model ${payload.model}`,
+      )
+      return {
+        ...rest,
+        thinking: { type: "adaptive" },
+        output_config: output_config ?? { effort: "medium" },
+      }
+    }
+    // Already adaptive — forward as-is
+    return { ...rest, thinking, output_config }
+  }
+
+  // Non-adaptive model — forward legacy format, drop output_config
+  return { ...rest, thinking }
+}
+
+/**
+ * Models that require the new adaptive thinking API.
+ * Populated dynamically at dispatch time via `isNativeAnthropicModel()`.
+ * This hard-coded check is the fallback.
+ */
+function isAdaptiveThinkingModel(model: string): boolean {
+  // claude-opus-4.7 and above use adaptive thinking
+  const match = model.match(/^claude-opus-4[.-](\d+)/)
+  if (match) {
+    const minor = Number.parseInt(match[1], 10)
+    return minor >= 7
+  }
+  return false
+}
+
+/**
+ * Check whether the request contains any image blocks (to set vision headers).
+ */
+function messageHasImages(payload: AnthropicMessagesPayload): boolean {
+  for (const msg of payload.messages) {
+    if (typeof msg.content === "string") continue
+    if (Array.isArray(msg.content)) {
+      for (const block of msg.content) {
+        if (block.type === "image") return true
+      }
+    }
+  }
+  return false
+}
diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts
new file mode 100644
index 000000000..e411b34e4
--- /dev/null
+++ b/src/services/copilot/native-models.ts
@@ -0,0 +1,57 @@
+/**
+ * Dynamic detection of which models support native Anthropic pass-through.
+ *
+ * The Copilot `/models` endpoint returns a `vendor` field for each model.
+ * Any model with `vendor === "Anthropic"` is served natively via the
+ * `/v1/messages` path at `api.enterprise.githubcopilot.com`.
+ *
+ * We cache the set of native model IDs after the first `/models` call and
+ * keep it in sync with `state.models` (which is refreshed periodically by
+ * the token-rotation logic).
+ */
+
+import { state } from "~/lib/state"
+
+/**
+ * Returns true if the given model ID should be routed to the native
+ * Anthropic pass-through service instead of the OpenAI chat-completions
+ * translation layer.
+ *
+ * Resolution order:
+ *  1. If `state.models` is populated, check whether the model's vendor is
+ *     "Anthropic" (live, always up-to-date).
+ *  2. Fall back to a static prefix list for resilience at startup before
+ *     the models list is fetched.
+ */
+export function isNativeAnthropicModel(modelId: string): boolean {
+  if (state.models?.data) {
+    const entry = state.models.data.find((m) => m.id === modelId)
+    if (entry) {
+      return entry.vendor === "Anthropic"
+    }
+    // Model not found in list — fall through to prefix heuristic
+  }
+
+  return matchesAnthropicPrefix(modelId)
+}
+
+/**
+ * Static prefix heuristic used before `state.models` is populated.
+ * Covers all current Claude variants served by Copilot.
+ */
+function matchesAnthropicPrefix(modelId: string): boolean {
+  return modelId.startsWith("claude-") || modelId.startsWith("claude_")
+}
+
+/**
+ * Return the full list of model IDs that support native Anthropic pass-through,
+ * derived from `state.models`.  Useful for logging / diagnostics.
+ *
+ * Falls back to an empty array if the models list has not been fetched yet.
+ */
+export function nativeAnthropicModelIds(): ReadonlyArray<string> {
+  if (!state.models?.data) return []
+  return state.models.data
+    .filter((m) => m.vendor === "Anthropic")
+    .map((m) => m.id)
+}

From dce9e6c3355e686ea28ddbecb5d6891adb255ce8 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 01:57:22 +0800
Subject: [PATCH 2/3] fix: address review round 1 feedback for native
 pass-through (#38)

- H1: Remove dead [DONE] sentinel from native SSE loop (Anthropic terminates via connection close)
- H3: Conditionally set accept: text/event-stream only when streaming
- M1: buildUpstreamPayload returns rest (not payload) when thinking absent, stripping output_config
- M2: Truncate raw SSE data to 200 chars in warn log to prevent log injection
- L2: Remove claude_ underscore prefix heuristic (no known Anthropic model uses it)
- L3: Document >= 7 threshold comment in isAdaptiveThinkingModel
- L4: Replace verbose JSDoc on nativeAnthropicModelIds with concise standard form
- Export buildUpstreamPayload for direct unit testing
- Add tests/native-passthrough.test.ts with T1-T9 covering payload transform and model routing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/routes/messages/handler.ts                |   6 +-
 .../copilot/create-messages-native.ts         |  19 ++-
 src/services/copilot/native-models.ts         |   8 +-
 tests/native-passthrough.test.ts              | 160 ++++++++++++++++++
 4 files changed, 178 insertions(+), 15 deletions(-)
 create mode 100644 tests/native-passthrough.test.ts

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index cf691ffd7..2bf1005ea 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -72,7 +72,6 @@ async function handleNative(
       data?: string
       event?: string
     }>) {
-      if (rawEvent.data === "[DONE]") break
       if (!rawEvent.data) continue
 
       // Parse to log but forward the original JSON verbatim
@@ -85,7 +84,10 @@ async function handleNative(
         })
       } catch {
         // Malformed chunk — skip
-        consola.warn("Could not parse native SSE chunk:", rawEvent.data)
+        consola.warn(
+          "Could not parse native SSE chunk:",
+          rawEvent.data.slice(0, 200),
+        )
       }
     }
   })
diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts
index 0fef793bf..e6c41cb72 100644
--- a/src/services/copilot/create-messages-native.ts
+++ b/src/services/copilot/create-messages-native.ts
@@ -37,7 +37,7 @@ export const createMessagesNative = async (
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
   const hasVision = messageHasImages(payload)
-  const headers = buildNativeHeaders(hasVision)
+  const headers = buildNativeHeaders(hasVision, Boolean(payload.stream))
 
   const upstream = `${copilotBaseUrl(state)}/v1/messages`
   consola.debug("Native Anthropic upstream:", upstream)
@@ -74,7 +74,10 @@ export const createMessagesNative = async (
  * header.  We reuse `copilotHeaders()` for auth/agent headers and then layer the
  * Anthropic-specific ones on top.
  */
-function buildNativeHeaders(vision: boolean): Record<string, string> {
+function buildNativeHeaders(
+  vision: boolean,
+  stream: boolean,
+): Record<string, string> {
   const base = copilotHeaders(state, vision)
 
   // The native /v1/messages endpoint expects these Anthropic headers
@@ -84,10 +87,9 @@ function buildNativeHeaders(vision: boolean): Record<string, string> {
     // Enable beta features: extended thinking + prompt caching
     "anthropic-beta":
       "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31",
-    // Accept Anthropic streaming format
-    accept: "text/event-stream",
-    // The upstream doesn't use openai-intent for the messages path
-    // but leaving it does no harm; keep for header consistency
+    // Only request SSE streaming format when the caller is streaming;
+    // non-streaming calls should use the default application/json accept
+    ...(stream ? { accept: "text/event-stream" } : {}),
   }
 }
 
@@ -101,13 +103,13 @@ function buildNativeHeaders(vision: boolean): Record<string, string> {
  * the correct format we leave it alone; if they sent the old format and the
  * model requires adaptive, we upgrade automatically.
  */
-function buildUpstreamPayload(
+export function buildUpstreamPayload(
   payload: AnthropicMessagesPayload,
 ): AnthropicMessagesPayload {
   const { thinking, output_config, ...rest } = payload
 
   if (!thinking) {
-    return payload
+    return rest // safe: output_config only valid alongside thinking
   }
 
   if (isAdaptiveThinkingModel(payload.model)) {
@@ -140,6 +142,7 @@ function isAdaptiveThinkingModel(model: string): boolean {
   const match = model.match(/^claude-opus-4[.-](\d+)/)
   if (match) {
     const minor = Number.parseInt(match[1], 10)
+    // claude-opus-4.7 and later use the new adaptive thinking API (not legacy budget_tokens)
     return minor >= 7
   }
   return false
diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts
index e411b34e4..7c3d4f59b 100644
--- a/src/services/copilot/native-models.ts
+++ b/src/services/copilot/native-models.ts
@@ -40,14 +40,12 @@ export function isNativeAnthropicModel(modelId: string): boolean {
  * Covers all current Claude variants served by Copilot.
  */
 function matchesAnthropicPrefix(modelId: string): boolean {
-  return modelId.startsWith("claude-") || modelId.startsWith("claude_")
+  return modelId.startsWith("claude-")
 }
 
 /**
- * Return the full list of model IDs that support native Anthropic pass-through,
- * derived from `state.models`.  Useful for logging / diagnostics.
- *
- * Falls back to an empty array if the models list has not been fetched yet.
+ * Returns all model IDs that support native Anthropic pass-through.
+ * Used for diagnostics and startup logging.
  */
 export function nativeAnthropicModelIds(): ReadonlyArray<string> {
   if (!state.models?.data) return []
diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts
new file mode 100644
index 000000000..759df813d
--- /dev/null
+++ b/tests/native-passthrough.test.ts
@@ -0,0 +1,160 @@
+import { describe, test, expect, afterEach } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+import { state } from "~/lib/state"
+import { buildUpstreamPayload } from "~/services/copilot/create-messages-native"
+import { isNativeAnthropicModel } from "~/services/copilot/native-models"
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Minimal valid payload base — only the fields required by the type. */
+function basePayload(
+  overrides: Partial<AnthropicMessagesPayload>,
+): AnthropicMessagesPayload {
+  return {
+    model: "claude-sonnet-4-5",
+    messages: [{ role: "user", content: "hi" }],
+    max_tokens: 1024,
+    ...overrides,
+  }
+}
+
+// ---------------------------------------------------------------------------
+// buildUpstreamPayload tests
+// ---------------------------------------------------------------------------
+
+describe("buildUpstreamPayload", () => {
+  // T1 — output_config present but thinking absent → output_config stripped
+  test("T1: strips output_config when thinking is absent", () => {
+    const payload = basePayload({
+      output_config: { effort: "high" },
+    })
+    const result = buildUpstreamPayload(payload)
+    expect(result).not.toHaveProperty("output_config")
+    expect(result).not.toHaveProperty("thinking")
+  })
+
+  // T2 — adaptive upgrade preserves explicit output_config: { effort: "high" }
+  test("T2: adaptive upgrade preserves explicit output_config effort", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled" },
+      output_config: { effort: "high" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    // Should keep caller's "high", not override to "medium"
+    expect(result.output_config).toEqual({ effort: "high" })
+  })
+
+  // T3 — already adaptive → forwarded as-is
+  test("T3: already-adaptive thinking forwarded as-is", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "adaptive" },
+      output_config: { effort: "low" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    expect(result.output_config).toEqual({ effort: "low" })
+  })
+
+  // T4 — legacy model with enabled thinking → kept as-is, no adaptive upgrade
+  test("T4: legacy model with enabled thinking kept as-is", () => {
+    const payload = basePayload({
+      model: "claude-sonnet-4-5",
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// isNativeAnthropicModel tests
+// ---------------------------------------------------------------------------
+
+// Save original models state and restore after each test
+const originalModels = state.models
+
+afterEach(() => {
+  state.models = originalModels
+})
+
+describe("isNativeAnthropicModel", () => {
+  // T5 — model in loaded list with vendor "Anthropic" → true
+  test("T5: model with vendor Anthropic in loaded list → true", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "claude-sonnet-4-5",
+          vendor: "Anthropic",
+          name: "Claude Sonnet 4.5",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "claude",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "chat",
+          },
+        },
+      ],
+    }
+    expect(isNativeAnthropicModel("claude-sonnet-4-5")).toBe(true)
+  })
+
+  // T6 — model in loaded list with vendor "OpenAI" → false
+  test("T6: model with vendor OpenAI in loaded list → false", () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-4o",
+          vendor: "OpenAI",
+          name: "GPT-4o",
+          object: "model",
+          version: "1",
+          preview: false,
+          model_picker_enabled: true,
+          capabilities: {
+            family: "gpt",
+            limits: {},
+            object: "model_capabilities",
+            supports: {},
+            tokenizer: "cl100k_base",
+            type: "chat",
+          },
+        },
+      ],
+    }
+    expect(isNativeAnthropicModel("gpt-4o")).toBe(false)
+  })
+
+  // T7 — model NOT in loaded list, starts with "claude-" → true (heuristic)
+  test("T7: model not in loaded list but starts with claude- → true", () => {
+    state.models = { object: "list", data: [] }
+    expect(isNativeAnthropicModel("claude-future-1")).toBe(true)
+  })
+
+  // T8 — model NOT in loaded list, starts with "gpt-" → false
+  test("T8: model not in loaded list and starts with gpt- → false", () => {
+    state.models = { object: "list", data: [] }
+    expect(isNativeAnthropicModel("gpt-5")).toBe(false)
+  })
+
+  // T9 — state.models undefined → heuristic
+  test("T9: state.models undefined → heuristic (claude- prefix → true)", () => {
+    state.models = undefined
+    expect(isNativeAnthropicModel("claude-something")).toBe(true)
+  })
+})

From 6c92355168144fbcdc60cb11ec3dc56464aeab49 Mon Sep 17 00:00:00 2001
From: HXYerror <48976608+HXYerror@users.noreply.github.com>
Date: Mon, 11 May 2026 11:11:37 +0800
Subject: [PATCH 3/3] fix: address review round 2 feedback for native
 pass-through (#38)

---
 src/routes/messages/handler.ts                |  15 +--
 .../copilot/create-messages-native.ts         |  19 ++--
 src/services/copilot/native-models.ts         |  19 ----
 tests/native-passthrough.test.ts              | 105 +++++++++++++++++-
 4 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 2bf1005ea..e383b2dc0 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -74,18 +74,19 @@ async function handleNative(
     }>) {
       if (!rawEvent.data) continue
 
-      // Parse to log but forward the original JSON verbatim
+      // Forward verbatim — never block on parse failure
+      await stream.writeSSE({
+        event: rawEvent.event,
+        data: rawEvent.data,
+      })
+
+      // Parse only for debug logging
       try {
         const parsed = JSON.parse(rawEvent.data) as AnthropicStreamEventData
         consola.debug("Native SSE event:", parsed.type)
-        await stream.writeSSE({
-          event: parsed.type,
-          data: rawEvent.data,
-        })
       } catch {
-        // Malformed chunk — skip
         consola.warn(
-          "Could not parse native SSE chunk:",
+          "Could not parse native SSE chunk for logging:",
           rawEvent.data.slice(0, 200),
         )
       }
diff --git a/src/services/copilot/create-messages-native.ts b/src/services/copilot/create-messages-native.ts
index e6c41cb72..8b9fb54fd 100644
--- a/src/services/copilot/create-messages-native.ts
+++ b/src/services/copilot/create-messages-native.ts
@@ -80,15 +80,16 @@ function buildNativeHeaders(
 ): Record<string, string> {
   const base = copilotHeaders(state, vision)
 
-  // The native /v1/messages endpoint expects these Anthropic headers
+  // Remove headers that are OpenAI-specific and not expected by Anthropic endpoint
+  const { "openai-intent": _dropped, ...anthropicBase } = base
+
   return {
-    ...base,
+    ...anthropicBase,
     "anthropic-version": "2023-06-01",
     // Enable beta features: extended thinking + prompt caching
     "anthropic-beta":
       "interleaved-thinking-2025-05-14,prompt-caching-2024-07-31",
-    // Only request SSE streaming format when the caller is streaming;
-    // non-streaming calls should use the default application/json accept
+    // Only request SSE streaming format when the caller is streaming
     ...(stream ? { accept: "text/event-stream" } : {}),
   }
 }
@@ -121,7 +122,8 @@ export function buildUpstreamPayload(
       return {
         ...rest,
         thinking: { type: "adaptive" },
-        output_config: output_config ?? { effort: "medium" },
+        output_config:
+          output_config?.effort ? output_config : { effort: "medium" },
       }
     }
     // Already adaptive — forward as-is
@@ -133,9 +135,10 @@ export function buildUpstreamPayload(
 }
 
 /**
- * Models that require the new adaptive thinking API.
- * Populated dynamically at dispatch time via `isNativeAnthropicModel()`.
- * This hard-coded check is the fallback.
+ * Returns true for models that require the adaptive thinking API
+ * (`{ type: "adaptive" }` + `output_config.effort`) rather than the
+ * legacy `{ type: "enabled", budget_tokens: N }`.
+ * Currently: claude-opus-4.7 and later.
  */
 function isAdaptiveThinkingModel(model: string): boolean {
   // claude-opus-4.7 and above use adaptive thinking
diff --git a/src/services/copilot/native-models.ts b/src/services/copilot/native-models.ts
index 7c3d4f59b..7d731d01b 100644
--- a/src/services/copilot/native-models.ts
+++ b/src/services/copilot/native-models.ts
@@ -32,24 +32,5 @@ export function isNativeAnthropicModel(modelId: string): boolean {
     // Model not found in list — fall through to prefix heuristic
   }
 
-  return matchesAnthropicPrefix(modelId)
-}
-
-/**
- * Static prefix heuristic used before `state.models` is populated.
- * Covers all current Claude variants served by Copilot.
- */
-function matchesAnthropicPrefix(modelId: string): boolean {
   return modelId.startsWith("claude-")
 }
-
-/**
- * Returns all model IDs that support native Anthropic pass-through.
- * Used for diagnostics and startup logging.
- */
-export function nativeAnthropicModelIds(): ReadonlyArray<string> {
-  if (!state.models?.data) return []
-  return state.models.data
-    .filter((m) => m.vendor === "Anthropic")
-    .map((m) => m.id)
-}
diff --git a/tests/native-passthrough.test.ts b/tests/native-passthrough.test.ts
index 759df813d..6e4f09c72 100644
--- a/tests/native-passthrough.test.ts
+++ b/tests/native-passthrough.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, afterEach } from "bun:test"
+import { describe, test, expect, beforeEach, afterEach } from "bun:test"
 
 import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
@@ -72,17 +72,45 @@ describe("buildUpstreamPayload", () => {
     expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 })
     expect(result).not.toHaveProperty("output_config")
   })
+
+  // T5 — adaptive upgrade with no output_config → defaults to effort:medium
+  test("T5: adaptive upgrade with no output_config defaults to effort:medium", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled", budget_tokens: 1024 },
+      // output_config intentionally absent
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    expect(result.output_config).toEqual({ effort: "medium" })
+  })
+
+  // T6 — output_config: {} also triggers default (not bypassed)
+  test("T6: empty output_config triggers medium effort default", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled" },
+      output_config: {},
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+    expect(result.output_config).toEqual({ effort: "medium" })
+  })
 })
 
 // ---------------------------------------------------------------------------
 // isNativeAnthropicModel tests
 // ---------------------------------------------------------------------------
 
-// Save original models state and restore after each test
-const originalModels = state.models
+// Per-test state isolation
+let savedModels: typeof state.models
+
+beforeEach(() => {
+  savedModels = state.models
+})
 
 afterEach(() => {
-  state.models = originalModels
+  state.models = savedModels
 })
 
 describe("isNativeAnthropicModel", () => {
@@ -158,3 +186,72 @@ describe("isNativeAnthropicModel", () => {
     expect(isNativeAnthropicModel("claude-something")).toBe(true)
   })
 })
+
+// ---------------------------------------------------------------------------
+// isAdaptiveThinkingModel boundary tests (via buildUpstreamPayload)
+// ---------------------------------------------------------------------------
+
+describe("isAdaptiveThinkingModel boundaries (via buildUpstreamPayload)", () => {
+  // B1 — claude-opus-4.6 is NOT upgraded (one below threshold)
+  test("B1: claude-opus-4.6 does NOT get adaptive upgrade", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.6",
+      thinking: { type: "enabled", budget_tokens: 2048 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 2048 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+
+  // B2 — claude-opus-4.7 IS upgraded (exact threshold)
+  test("B2: claude-opus-4.7 (dot separator) IS upgraded to adaptive", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.7",
+      thinking: { type: "enabled" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+  })
+
+  // B3 — claude-opus-4-7 (dash separator) IS upgraded
+  test("B3: claude-opus-4-7 (dash separator) IS upgraded to adaptive", () => {
+    const payload = basePayload({
+      model: "claude-opus-4-7",
+      thinking: { type: "enabled" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+  })
+
+  // B4 — claude-opus-4-6 (dash separator) is NOT upgraded
+  test("B4: claude-opus-4-6 (dash separator) NOT upgraded", () => {
+    const payload = basePayload({
+      model: "claude-opus-4-6",
+      thinking: { type: "enabled", budget_tokens: 512 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 512 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+
+  // B5 — claude-opus-4.8 (one above threshold) IS upgraded
+  test("B5: claude-opus-4.8 (one above threshold) IS upgraded", () => {
+    const payload = basePayload({
+      model: "claude-opus-4.8",
+      thinking: { type: "enabled" },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "adaptive" })
+  })
+
+  // B6 — claude-sonnet-4.7 (non-opus) is NOT upgraded
+  test("B6: claude-sonnet-4.7 (non-opus) NOT upgraded to adaptive", () => {
+    const payload = basePayload({
+      model: "claude-sonnet-4.7",
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    } as Partial<AnthropicMessagesPayload>)
+    const result = buildUpstreamPayload(payload)
+    expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 1024 })
+    expect(result).not.toHaveProperty("output_config")
+  })
+})