diff --git a/README.md b/README.md
index 0d36c13c9..f80926d57 100644
--- a/README.md
+++ b/README.md
@@ -188,9 +188,12 @@ These endpoints mimic the OpenAI API structure.
 | Endpoint                    | Method | Description                                               |
 | --------------------------- | ------ | --------------------------------------------------------- |
 | `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
+| `POST /v1/responses`        | `POST` | Creates a model response using the Responses API format.  |
 | `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
 | `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
 
+For GPT-family models, `/v1/chat/completions` is a compatibility layer. If you need native Responses features and the best chance of preserving model-specific reasoning metadata, prefer `POST /v1/responses`.
+
 ### Anthropic Compatible Endpoints
 
 These endpoints are designed to be compatible with the Anthropic Messages API.
diff --git a/src/lib/model-level.ts b/src/lib/model-level.ts
new file mode 100644
index 000000000..42144569f
--- /dev/null
+++ b/src/lib/model-level.ts
@@ -0,0 +1,52 @@
+export const MODEL_LEVELS = ["low", "medium", "high", "xhigh"] as const
+
+export type ModelLevel = (typeof MODEL_LEVELS)[number]
+
+const CLAUDE_MODEL_LEVEL_VARIANTS = {
+  "claude-opus-4.6": ["low", "medium", "high"],
+  "claude-opus-4.6-fast": ["low", "medium", "high"],
+  "claude-sonnet-4.6": ["low", "medium", "high"],
+} as const satisfies Record<string, ReadonlyArray<ModelLevel>>
+
+export const parseModelNameWithLevel = (
+  model: string,
+): {
+  baseModel: string
+  level: ModelLevel | undefined
+} => {
+  const match = model.match(/^(.+)\((low|medium|high|xhigh)\)$/)
+  if (!match) {
+    return {
+      baseModel: model,
+      level: undefined,
+    }
+  }
+
+  return {
+    baseModel: match[1],
+    level: match[2] as ModelLevel,
+  }
+}
+
+export const isGptResponsesModel = (model: string): boolean =>
+  model.startsWith("gpt-")
+
+export const supportsGptReasoningEffort = (model: string): boolean =>
+  model.startsWith("gpt-5")
+
+export const getModelLevelsForModel = (
+  model: string,
+): ReadonlyArray<ModelLevel> | undefined => {
+  if (supportsGptReasoningEffort(model)) {
+    return MODEL_LEVELS
+  }
+
+  return CLAUDE_MODEL_LEVEL_VARIANTS[
+    model as keyof typeof CLAUDE_MODEL_LEVEL_VARIANTS
+  ]
+}
+
+export const isClaudeThinkingModel = (model: string): boolean =>
+  model === "claude-opus-4.6"
+  || model === "claude-opus-4.6-fast"
+  || model === "claude-sonnet-4.6"
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..f72414781 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { isGptResponsesModel, parseModelNameWithLevel } from "~/lib/model-level"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -12,17 +13,29 @@ import {
   createChatCompletions,
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
+  normalizeChatCompletionsPayloadModel,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponses,
+  type ResponsesApiResponse,
+} from "~/services/copilot/create-responses"
+
+import {
+  translateChatCompletionsToResponses,
+  translateResponsesStreamToChatStream,
+  translateResponsesToChatCompletions,
+} from "./responses-translation"
 
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
+  const { baseModel } = parseModelNameWithLevel(payload.model)
   consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
   const selectedModel = state.models?.data.find(
-    (model) => model.id === payload.model,
+    (model) => model.id === baseModel,
   )
 
   // Calculate and display token count
@@ -47,7 +60,33 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  const response = await createChatCompletions(payload)
+  const normalizedPayload = normalizeChatCompletionsPayloadModel(payload)
+
+  if (isGptResponsesModel(baseModel)) {
+    const responsesPayload =
+      translateChatCompletionsToResponses(normalizedPayload)
+    const responses = await createResponses(responsesPayload)
+
+    if (isNonStreamingResponse(responses)) {
+      const completionResponse = translateResponsesToChatCompletions(responses)
+      consola.debug(
+        "GPT translated response:",
+        JSON.stringify(completionResponse).slice(-400),
+      )
+      return c.json(completionResponse)
+    }
+
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of translateResponsesStreamToChatStream(
+        responses,
+        normalizedPayload.model,
+      )) {
+        await stream.writeSSE(chunk)
+      }
+    })
+  }
+
+  const response = await createChatCompletions(normalizedPayload)
 
   if (isNonStreaming(response)) {
     consola.debug("Non-streaming response:", JSON.stringify(response))
@@ -63,6 +102,10 @@ export async function handleCompletion(c: Context) {
   })
 }
 
+const isNonStreamingResponse = (
+  response: Awaited<ReturnType<typeof createResponses>>,
+): response is ResponsesApiResponse => !(Symbol.asyncIterator in response)
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/routes/chat-completions/responses-translation.ts b/src/routes/chat-completions/responses-translation.ts
new file mode 100644
index 000000000..b7edfc7e2
--- /dev/null
+++ b/src/routes/chat-completions/responses-translation.ts
@@ -0,0 +1,570 @@
+import type { SSEMessage } from "hono/streaming"
+
+import { randomUUID } from "node:crypto"
+
+import type {
+  ChatCompletionChunk,
+  ChatCompletionResponse,
+  ChatCompletionsPayload,
+  ContentPart,
+  Message,
+  ToolCall,
+} from "~/services/copilot/create-chat-completions"
+import type {
+  ResponseInputContentPart,
+  ResponseInputMessage,
+  ResponsesApiResponse,
+  ResponsesFunctionCall,
+  ResponsesOutputContentPart,
+  ResponsesOutputItem,
+  ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+export function translateChatCompletionsToResponses(
+  payload: ChatCompletionsPayload,
+): ResponsesPayload {
+  return {
+    model: payload.model,
+    input: payload.messages.map((message) => translateMessage(message)),
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    max_output_tokens: payload.max_tokens,
+    stop: payload.stop,
+    tools: payload.tools as Array<unknown> | null | undefined,
+    tool_choice: payload.tool_choice,
+    user: payload.user,
+    reasoning_effort: payload.reasoning_effort,
+    reasoning:
+      payload.reasoning
+      ?? (payload.reasoning_effort ?
+        {
+          effort: payload.reasoning_effort,
+        }
+      : undefined),
+  }
+}
+
+export function translateResponsesToChatCompletions(
+  response: ResponsesApiResponse,
+): ChatCompletionResponse {
+  const outputItems = response.output ?? []
+  const messageContent = extractOutputText(outputItems, response.output_text)
+  const toolCalls = extractToolCalls(outputItems)
+  const completionTokens = response.usage?.output_tokens ?? 0
+  const promptTokens = response.usage?.input_tokens ?? 0
+
+  return {
+    id: response.id,
+    object: "chat.completion",
+    created: response.created_at ?? Math.floor(Date.now() / 1000),
+    model: response.model,
+    choices: [
+      {
+        index: 0,
+        message: {
+          role: "assistant",
+          content: messageContent.length > 0 ? messageContent : null,
+          ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
+        },
+        logprobs: null,
+        finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop",
+      },
+    ],
+    usage: {
+      prompt_tokens: promptTokens,
+      completion_tokens: completionTokens,
+      total_tokens:
+        response.usage?.total_tokens ?? promptTokens + completionTokens,
+    },
+  }
+}
+
+// eslint-disable-next-line max-lines-per-function, complexity
+export async function* translateResponsesStreamToChatStream(
+  responseStream: AsyncIterable<{ data?: string }>,
+  model: string,
+): AsyncGenerator<SSEMessage> {
+  const completionId = randomUUID()
+  const created = Math.floor(Date.now() / 1000)
+  let hasEmittedRole = false
+  let finishReason: ChatChunkFinishReason = "stop"
+  let hasEmittedTerminalChunk = false
+  let nextToolCallIndex = 0
+  const toolCallStates = new Map<string, StreamToolCallState>()
+
+  for await (const rawEvent of responseStream) {
+    if (rawEvent.data === "[DONE]") {
+      if (!hasEmittedTerminalChunk) {
+        yield {
+          data: JSON.stringify(
+            createChatChunk(completionId, created, model, {}, finishReason),
+          ),
+        }
+      }
+      yield { data: "[DONE]" }
+      return
+    }
+
+    if (!rawEvent.data) {
+      continue
+    }
+
+    const parsedEvent = JSON.parse(rawEvent.data) as {
+      type?: string
+      [key: string]: unknown
+    }
+
+    if (
+      parsedEvent.type === "response.output_text.delta"
+      && typeof parsedEvent.delta === "string"
+    ) {
+      const delta = withAssistantRole(hasEmittedRole, {
+        content: parsedEvent.delta,
+      })
+      hasEmittedRole = true
+      yield {
+        data: JSON.stringify(
+          createChatChunk(completionId, created, model, delta, null),
+        ),
+      }
+      continue
+    }
+
+    if (
+      parsedEvent.type === "response.output_item.added"
+      && typeof parsedEvent.output_index === "number"
+      && isStreamOutputItem(parsedEvent.item)
+      && isResponsesFunctionCallItem(parsedEvent.item)
+    ) {
+      finishReason = "tool_calls"
+      const state = getOrCreateToolCallState(
+        toolCallStates,
+        parsedEvent.item.id
+          ?? parsedEvent.item.call_id
+          ?? String(parsedEvent.output_index),
+        parsedEvent.output_index,
+        parsedEvent.item,
+        () => nextToolCallIndex++,
+      )
+      const delta = withAssistantRole(hasEmittedRole, {
+        tool_calls: [
+          {
+            index: state.chatIndex,
+            id: state.id,
+            type: "function",
+            function: {
+              name: state.name,
+              arguments: "",
+            },
+          },
+        ],
+      })
+      hasEmittedRole = true
+      state.hasEmittedInitialChunk = true
+      yield {
+        data: JSON.stringify(
+          createChatChunk(completionId, created, model, delta, null),
+        ),
+      }
+      continue
+    }
+
+    if (
+      parsedEvent.type === "response.function_call_arguments.delta"
+      && typeof parsedEvent.item_id === "string"
+      && typeof parsedEvent.output_index === "number"
+      && typeof parsedEvent.call_id === "string"
+      && typeof parsedEvent.delta === "string"
+    ) {
+      finishReason = "tool_calls"
+      const state = getOrCreateToolCallState(
+        toolCallStates,
+        parsedEvent.item_id,
+        parsedEvent.output_index,
+        {
+          type: "function_call",
+          call_id: parsedEvent.call_id,
+        },
+        () => nextToolCallIndex++,
+      )
+      state.arguments += parsedEvent.delta
+      const delta = withAssistantRole(hasEmittedRole, {
+        tool_calls: [
+          {
+            index: state.chatIndex,
+            function: {
+              arguments: parsedEvent.delta,
+            },
+          },
+        ],
+      })
+      hasEmittedRole = true
+      yield {
+        data: JSON.stringify(
+          createChatChunk(completionId, created, model, delta, null),
+        ),
+      }
+      continue
+    }
+
+    if (
+      parsedEvent.type === "response.function_call_arguments.done"
+      && typeof parsedEvent.item_id === "string"
+      && typeof parsedEvent.output_index === "number"
+      && typeof parsedEvent.call_id === "string"
+      && typeof parsedEvent.arguments === "string"
+    ) {
+      finishReason = "tool_calls"
+      const state = getOrCreateToolCallState(
+        toolCallStates,
+        parsedEvent.item_id,
+        parsedEvent.output_index,
+        {
+          type: "function_call",
+          call_id: parsedEvent.call_id,
+          arguments: parsedEvent.arguments,
+        },
+        () => nextToolCallIndex++,
+      )
+      const remainder =
+        parsedEvent.arguments.startsWith(state.arguments) ?
+          parsedEvent.arguments.slice(state.arguments.length)
+        : parsedEvent.arguments
+      state.arguments = parsedEvent.arguments
+      if (remainder.length === 0) {
+        continue
+      }
+
+      const delta = withAssistantRole(hasEmittedRole, {
+        tool_calls: [
+          {
+            index: state.chatIndex,
+            function: {
+              arguments: remainder,
+            },
+          },
+        ],
+      })
+      hasEmittedRole = true
+      yield {
+        data: JSON.stringify(
+          createChatChunk(completionId, created, model, delta, null),
+        ),
+      }
+      continue
+    }
+
+    if (
+      parsedEvent.type === "response.output_item.done"
+      && typeof parsedEvent.output_index === "number"
+      && isStreamOutputItem(parsedEvent.item)
+      && isResponsesFunctionCallItem(parsedEvent.item)
+    ) {
+      finishReason = "tool_calls"
+      const itemId =
+        parsedEvent.item.id
+        ?? parsedEvent.item.call_id
+        ?? String(parsedEvent.output_index)
+      const state = getOrCreateToolCallState(
+        toolCallStates,
+        itemId,
+        parsedEvent.output_index,
+        parsedEvent.item,
+        () => nextToolCallIndex++,
+      )
+      if (state.name === undefined && parsedEvent.item.name) {
+        state.name = parsedEvent.item.name
+      }
+
+      if (!state.hasEmittedInitialChunk) {
+        const delta = withAssistantRole(hasEmittedRole, {
+          tool_calls: [
+            {
+              index: state.chatIndex,
+              id: state.id,
+              type: "function",
+              function: {
+                name: state.name,
+                arguments: "",
+              },
+            },
+          ],
+        })
+        hasEmittedRole = true
+        state.hasEmittedInitialChunk = true
+        yield {
+          data: JSON.stringify(
+            createChatChunk(completionId, created, model, delta, null),
+          ),
+        }
+      }
+
+      const finalArguments = parsedEvent.item.arguments ?? ""
+      const remainder =
+        finalArguments.startsWith(state.arguments) ?
+          finalArguments.slice(state.arguments.length)
+        : finalArguments
+      state.arguments = finalArguments
+      if (remainder.length === 0) {
+        continue
+      }
+
+      const delta = withAssistantRole(hasEmittedRole, {
+        tool_calls: [
+          {
+            index: state.chatIndex,
+            function: {
+              arguments: remainder,
+            },
+          },
+        ],
+      })
+      hasEmittedRole = true
+      yield {
+        data: JSON.stringify(
+          createChatChunk(completionId, created, model, delta, null),
+        ),
+      }
+      continue
+    }
+
+    if (parsedEvent.type === "response.completed") {
+      hasEmittedTerminalChunk = true
+      yield {
+        data: JSON.stringify(
+          createChatChunk(completionId, created, model, {}, finishReason),
+        ),
+      }
+    }
+  }
+}
+
+function translateMessage(message: Message): ResponseInputMessage {
+  let content: ResponseInputMessage["content"]
+  if (typeof message.content === "string") {
+    content = message.content
+  } else if (message.content === null) {
+    content = ""
+  } else {
+    content = message.content.map((part) => translateContentPart(part))
+  }
+
+  return {
+    role: message.role,
+    content,
+    ...(message.name ? { name: message.name } : {}),
+    ...(message.tool_call_id ? { tool_call_id: message.tool_call_id } : {}),
+    ...(message.tool_calls ?
+      {
+        tool_calls: message.tool_calls.map((toolCall) =>
+          translateToolCall(toolCall),
+        ),
+      }
+    : {}),
+  }
+}
+
+function translateContentPart(part: ContentPart): ResponseInputContentPart {
+  if (part.type === "text") {
+    return {
+      type: "input_text",
+      text: part.text,
+    }
+  }
+
+  return {
+    type: "input_image",
+    image_url: part.image_url.url,
+    detail: part.image_url.detail,
+  }
+}
+
+function translateToolCall(
+  toolCall: ToolCall,
+): NonNullable<ResponseInputMessage["tool_calls"]>[number] {
+  return {
+    ...toolCall,
+    function: {
+      ...toolCall.function,
+    },
+  }
+}
+
+function extractOutputText(
+  outputItems: Array<ResponsesOutputItem>,
+  outputText: string | undefined,
+): string {
+  if (outputText) {
+    return outputText
+  }
+
+  const parts = outputItems.flatMap((item) => {
+    if (item.type !== "message") {
+      return []
+    }
+
+    if (typeof item.content === "string") {
+      return [item.content]
+    }
+
+    if (!Array.isArray(item.content)) {
+      return []
+    }
+
+    return item.content.flatMap((contentPart) =>
+      contentPart.type === "output_text" || contentPart.type === "text" ?
+        [contentPart.text]
+      : [],
+    )
+  })
+
+  return parts.join("")
+}
+
+function extractToolCalls(
+  outputItems: Array<ResponsesOutputItem>,
+): Array<ToolCall> {
+  const toolCalls: Array<ToolCall> = []
+  for (const item of outputItems) {
+    if (item.type === "function_call") {
+      toolCalls.push(translateFunctionCall(item))
+      continue
+    }
+
+    if (
+      item.type !== "message"
+      || typeof item.content === "string"
+      || !Array.isArray(item.content)
+    ) {
+      continue
+    }
+
+    for (const contentPart of item.content) {
+      if (isResponsesFunctionCall(contentPart)) {
+        toolCalls.push(translateFunctionCall(contentPart))
+      }
+    }
+  }
+
+  return toolCalls
+}
+
+function translateFunctionCall(functionCall: ResponsesFunctionCall): ToolCall {
+  return {
+    id: functionCall.call_id ?? functionCall.id ?? randomUUID(),
+    type: "function",
+    function: {
+      name: functionCall.name,
+      arguments: functionCall.arguments ?? "",
+    },
+  }
+}
+
+function isResponsesFunctionCall(
+  value: ResponsesOutputContentPart,
+): value is ResponsesFunctionCall {
+  return (
+    value.type === "function_call"
+    && "name" in value
+    && typeof value.name === "string"
+  )
+}
+
+function isResponsesFunctionCallItem(
+  value: ResponsesOutputItem | StreamOutputItem,
+): value is ResponsesFunctionCall {
+  return value.type === "function_call" && typeof value.name === "string"
+}
+
+function isStreamOutputItem(value: unknown): value is StreamOutputItem {
+  return (
+    typeof value === "object"
+    && value !== null
+    && "type" in value
+    && typeof value.type === "string"
+  )
+}
+
+// eslint-disable-next-line max-params
+function createChatChunk(
+  id: string,
+  created: number,
+  model: string,
+  delta: ChatChunkDelta,
+  finishReason: ChatChunkFinishReason,
+): ChatCompletionChunk {
+  return {
+    id,
+    object: "chat.completion.chunk",
+    created,
+    model,
+    choices: [
+      {
+        index: 0,
+        delta,
+        finish_reason: finishReason,
+        logprobs: null,
+      },
+    ],
+  }
+}
+
+function withAssistantRole(
+  hasEmittedRole: boolean,
+  delta: ChatChunkDelta,
+): ChatChunkDelta {
+  return hasEmittedRole ? delta : { role: "assistant", ...delta }
+}
+
+// eslint-disable-next-line max-params
+function getOrCreateToolCallState(
+  toolCallStates: Map<string, StreamToolCallState>,
+  key: string,
+  outputIndex: number,
+  item: Partial<ResponsesFunctionCall>,
+  getNextToolCallIndex: () => number,
+): StreamToolCallState {
+  const existingState = toolCallStates.get(key)
+  if (existingState) {
+    if (existingState.name === undefined && item.name) {
+      existingState.name = item.name
+    }
+    return existingState
+  }
+
+  const nextState: StreamToolCallState = {
+    arguments: item.arguments ?? "",
+    chatIndex: getNextToolCallIndex(),
+    hasEmittedInitialChunk: false,
+    id: item.call_id ?? item.id ?? randomUUID(),
+    name: item.name,
+    outputIndex,
+  }
+  toolCallStates.set(key, nextState)
+  return nextState
+}
+
+type ChatChunkDelta = ChatCompletionChunk["choices"][number]["delta"]
+type ChatChunkFinishReason =
+  ChatCompletionChunk["choices"][number]["finish_reason"]
+
+interface StreamToolCallState {
+  arguments: string
+  chatIndex: number
+  hasEmittedInitialChunk: boolean
+  id: string
+  name?: string
+  outputIndex: number
+}
+
+type StreamOutputItem =
+  | ResponsesFunctionCall
+  | {
+      type: "message"
+      id?: string
+      role?: "assistant" | "user" | "system" | "tool"
+      content?: Array<unknown>
+      object?: string
+      status?: string
+    }
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc8..1395bda6a 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature?: string
 }
 
 export type AnthropicUserContentBlock =
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..54092778c 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -11,6 +11,10 @@ import {
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponses,
+  type ResponsesApiResponse,
+} from "~/services/copilot/create-responses"
 
 import {
   type AnthropicMessagesPayload,
@@ -20,8 +24,23 @@ import {
   translateToAnthropic,
   translateToOpenAI,
 } from "./non-stream-translation"
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "./responses-stream-translation"
+import {
+  translateAnthropicToResponses,
+  translateResponsesToAnthropic,
+} from "./responses-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
 
+const DEBUG_LOG_LENGTH = 400
+
+type ResponsesStreamEvent = {
+  type?: string
+  [key: string]: unknown
+}
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
@@ -38,6 +57,58 @@ export async function handleCompletion(c: Context) {
     await awaitApproval()
   }
 
+  if (shouldUseResponsesApi(anthropicPayload)) {
+    const responsesPayload = translateAnthropicToResponses(anthropicPayload)
+    consola.debug(
+      "Translated Responses payload:",
+      stringifyResponsesDebug(responsesPayload),
+    )
+
+    const response = await createResponses(responsesPayload)
+
+    if (isNonStreamingResponse(response)) {
+      consola.debug(
+        "Non-streaming response from Responses API:",
+        stringifyResponsesDebug(response),
+      )
+      const anthropicResponse = translateResponsesToAnthropic(response)
+      consola.debug(
+        "Translated Anthropic response:",
+        stringifyResponsesDebug(anthropicResponse),
+      )
+      return c.json(anthropicResponse)
+    }
+
+    consola.debug("Streaming response from Responses API")
+    return streamSSE(c, async (stream) => {
+      const streamState = createResponsesStreamState()
+
+      for await (const rawEvent of response) {
+        if (!rawEvent.data || rawEvent.data === "[DONE]") {
+          continue
+        }
+
+        const parsedEvent = JSON.parse(rawEvent.data) as ResponsesStreamEvent
+        consola.debug(
+          "Responses raw stream event:",
+          stringifyResponsesDebug(parsedEvent),
+        )
+        const events = translateResponsesStreamEvent(parsedEvent, streamState)
+
+        for (const event of events) {
+          consola.debug(
+            "Translated Anthropic event:",
+            stringifyResponsesDebug(event),
+          )
+          await stream.writeSSE({
+            event: event.type,
+            data: JSON.stringify(event),
+          })
+        }
+      }
+    })
+  }
+
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
@@ -89,3 +160,57 @@ export async function handleCompletion(c: Context) {
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isNonStreamingResponse = (
+  response: Awaited<ReturnType<typeof createResponses>>,
+): response is ResponsesApiResponse => !(Symbol.asyncIterator in response)
+
+function shouldUseResponsesApi(payload: AnthropicMessagesPayload): boolean {
+  return (
+    Boolean(payload.thinking)
+    || payload.messages.some(
+      (message) =>
+        message.role === "assistant"
+        && Array.isArray(message.content)
+        && message.content.some((block) => block.type === "thinking"),
+    )
+  )
+}
+
+function stringifyResponsesDebug(value: unknown): string {
+  const serialized = JSON.stringify(
+    value,
+    (key: string, nestedValue: unknown): unknown => {
+      if (
+        key === "encrypted_content"
+        || key === "signature"
+        || key === "thinking_signature"
+      ) {
+        return "[REDACTED]"
+      }
+
+      if (
+        key === "image_url"
+        && typeof nestedValue === "string"
+        && nestedValue.startsWith("data:")
+      ) {
+        return "[REDACTED_DATA_URL]"
+      }
+
+      if (
+        typeof nestedValue === "string"
+        && nestedValue.length > DEBUG_LOG_LENGTH
+      ) {
+        return `${nestedValue.slice(0, DEBUG_LOG_LENGTH)}…`
+      }
+
+      return nestedValue
+    },
+  )
+
+  if (serialized.length <= DEBUG_LOG_LENGTH) {
+    return serialized
+  }
+
+  return `${serialized.slice(0, DEBUG_LOG_LENGTH)}…`
+}
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..407b2b7d6 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -40,17 +40,24 @@ export function translateToOpenAI(
     stream: payload.stream,
     temperature: payload.temperature,
     top_p: payload.top_p,
+    thinking: payload.thinking,
     user: payload.metadata?.user_id,
     tools: translateAnthropicToolsToOpenAI(payload.tools),
     tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
   }
 }
 
-function translateModelName(model: string): string {
+export function translateModelName(model: string): string {
   // Subagent requests use a specific model number which Copilot doesn't support
-  if (model.startsWith("claude-sonnet-4-")) {
+  if (
+    model.startsWith("claude-sonnet-4-")
+    && !model.startsWith("claude-sonnet-4.6")
+  ) {
     return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
-  } else if (model.startsWith("claude-opus-")) {
+  } else if (
+    model.startsWith("claude-opus-4-")
+    && !model.startsWith("claude-opus-4.6")
+  ) {
     return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
   }
   return model
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
new file mode 100644
index 000000000..3cde9e58b
--- /dev/null
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -0,0 +1,472 @@
+import type { ResponsesApiResponse } from "~/services/copilot/create-responses"
+
+import type { AnthropicStreamEventData } from "./anthropic-types"
+
+import { THINKING_TEXT } from "./responses-translation"
+
+export interface ResponsesStreamState {
+  messageStartSent: boolean
+  nextContentBlockIndex: number
+  openBlockKey?: string
+  openBlockIndex?: number
+  blockHasDelta: Set<string>
+  toolBlockByKey: Map<string, ToolBlockState>
+}
+
+interface ToolBlockState {
+  id: string
+  name: string
+}
+
+export function createResponsesStreamState(): ResponsesStreamState {
+  return {
+    messageStartSent: false,
+    nextContentBlockIndex: 0,
+    blockHasDelta: new Set(),
+    toolBlockByKey: new Map(),
+  }
+}
+
+export function translateResponsesStreamEvent(
+  parsedEvent: { type?: string; [key: string]: unknown },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  switch (parsedEvent.type) {
+    case "response.created": {
+      return handleCreated(parsedEvent, state)
+    }
+    case "response.reasoning_summary_text.delta": {
+      return handleThinkingDelta(parsedEvent, state)
+    }
+    case "response.output_item.done": {
+      return handleOutputItemDone(parsedEvent, state)
+    }
+    case "response.output_text.delta": {
+      return handleTextDelta(parsedEvent, state)
+    }
+    case "response.output_item.added": {
+      return handleToolAdded(parsedEvent, state)
+    }
+    case "response.function_call_arguments.delta": {
+      return handleToolArgumentsDelta(parsedEvent, state)
+    }
+    case "response.function_call_arguments.done": {
+      return handleToolArgumentsDone(parsedEvent, state)
+    }
+    case "response.completed": {
+      return handleCompleted(parsedEvent, state)
+    }
+    default: {
+      return []
+    }
+  }
+}
+
+function handleCreated(
+  parsedEvent: { response?: unknown },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (state.messageStartSent || !isResponseObject(parsedEvent.response)) {
+    return []
+  }
+
+  state.messageStartSent = true
+  const cachedTokens =
+    parsedEvent.response.usage?.input_tokens_details?.cached_tokens
+
+  return [
+    {
+      type: "message_start",
+      message: {
+        id: parsedEvent.response.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: parsedEvent.response.model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens:
+            (parsedEvent.response.usage?.input_tokens ?? 0)
+            - (cachedTokens ?? 0),
+          output_tokens: 0,
+          ...(cachedTokens !== undefined && {
+            cache_read_input_tokens: cachedTokens,
+          }),
+        },
+      },
+    },
+  ]
+}
+
+function handleThinkingDelta(
+  parsedEvent: { output_index?: unknown; delta?: unknown },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (
+    typeof parsedEvent.output_index !== "number"
+    || typeof parsedEvent.delta !== "string"
+  ) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  const key = getThinkingKey(parsedEvent.output_index)
+  const blockIndex = openBlock({
+    state,
+    key,
+    contentBlock: {
+      type: "thinking",
+      thinking: "",
+    },
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "thinking_delta",
+      thinking: parsedEvent.delta,
+    },
+  })
+  state.blockHasDelta.add(key)
+  return events
+}
+
+function handleOutputItemDone(
+  parsedEvent: { output_index?: unknown; item?: unknown },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (
+    typeof parsedEvent.output_index !== "number"
+    || !isRecord(parsedEvent.item)
+  ) {
+    return []
+  }
+
+  if (
+    parsedEvent.item.type !== "reasoning"
+    || typeof parsedEvent.item.encrypted_content !== "string"
+  ) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  const key = getThinkingKey(parsedEvent.output_index)
+  const blockIndex = openBlock({
+    state,
+    key,
+    contentBlock: {
+      type: "thinking",
+      thinking: "",
+    },
+    events,
+  })
+
+  if (!state.blockHasDelta.has(key)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "thinking_delta",
+        thinking: THINKING_TEXT,
+      },
+    })
+  }
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "signature_delta",
+      signature:
+        typeof parsedEvent.item.id === "string" ?
+          `${parsedEvent.item.encrypted_content}@${parsedEvent.item.id}`
+        : parsedEvent.item.encrypted_content,
+    },
+  })
+  state.blockHasDelta.add(key)
+  return events
+}
+
+function handleTextDelta(
+  parsedEvent: {
+    output_index?: unknown
+    content_index?: unknown
+    delta?: unknown
+  },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (
+    typeof parsedEvent.output_index !== "number"
+    || typeof parsedEvent.content_index !== "number"
+    || typeof parsedEvent.delta !== "string"
+  ) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  const key = getTextKey(parsedEvent.output_index, parsedEvent.content_index)
+  const blockIndex = openBlock({
+    state,
+    key,
+    contentBlock: { type: "text", text: "" },
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "text_delta",
+      text: parsedEvent.delta,
+    },
+  })
+  state.blockHasDelta.add(key)
+  return events
+}
+
+function handleToolAdded(
+  parsedEvent: {
+    output_index?: unknown
+    item?: unknown
+  },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (
+    typeof parsedEvent.output_index !== "number"
+    || !isRecord(parsedEvent.item)
+    || parsedEvent.item.type !== "function_call"
+    || typeof parsedEvent.item.call_id !== "string"
+    || typeof parsedEvent.item.name !== "string"
+  ) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  const key = getToolKey(parsedEvent.output_index)
+  state.toolBlockByKey.set(key, {
+    id: parsedEvent.item.call_id,
+    name: parsedEvent.item.name,
+  })
+  openBlock({
+    state,
+    key,
+    contentBlock: {
+      type: "tool_use",
+      id: parsedEvent.item.call_id,
+      name: parsedEvent.item.name,
+      input: {},
+    },
+    events,
+  })
+  return events
+}
+
+function handleToolArgumentsDelta(
+  parsedEvent: {
+    output_index?: unknown
+    delta?: unknown
+  },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (
+    typeof parsedEvent.output_index !== "number"
+    || typeof parsedEvent.delta !== "string"
+  ) {
+    return []
+  }
+
+  const key = getToolKey(parsedEvent.output_index)
+  const toolBlock = state.toolBlockByKey.get(key)
+  if (!toolBlock) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  const blockIndex = openBlock({
+    state,
+    key,
+    contentBlock: {
+      type: "tool_use",
+      id: toolBlock.id,
+      name: toolBlock.name,
+      input: {},
+    },
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "input_json_delta",
+      partial_json: parsedEvent.delta,
+    },
+  })
+  state.blockHasDelta.add(key)
+
+  return events
+}
+
+function handleToolArgumentsDone(
+  parsedEvent: {
+    output_index?: unknown
+    arguments?: unknown
+  },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (
+    typeof parsedEvent.output_index !== "number"
+    || typeof parsedEvent.arguments !== "string"
+  ) {
+    return []
+  }
+
+  const key = getToolKey(parsedEvent.output_index)
+  const toolBlock = state.toolBlockByKey.get(key)
+  if (!toolBlock || state.blockHasDelta.has(key)) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  const blockIndex = openBlock({
+    state,
+    key,
+    contentBlock: {
+      type: "tool_use",
+      id: toolBlock.id,
+      name: toolBlock.name,
+      input: {},
+    },
+    events,
+  })
+
+  state.blockHasDelta.add(key)
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "input_json_delta",
+      partial_json: parsedEvent.arguments,
+    },
+  })
+  return events
+}
+
+function handleCompleted(
+  parsedEvent: { response?: unknown },
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> {
+  if (!isResponseObject(parsedEvent.response)) {
+    return []
+  }
+
+  const events: Array<AnthropicStreamEventData> = []
+  closeOpenBlock(state, events)
+  const cachedTokens =
+    parsedEvent.response.usage?.input_tokens_details?.cached_tokens
+
+  events.push(
+    {
+      type: "message_delta",
+      delta: {
+        stop_reason:
+          (
+            parsedEvent.response.output?.some(
+              (item) => item.type === "function_call",
+            )
+          ) ?
+            "tool_use"
+          : "end_turn",
+        stop_sequence: null,
+      },
+      usage: {
+        input_tokens:
+          (parsedEvent.response.usage?.input_tokens ?? 0) - (cachedTokens ?? 0),
+        output_tokens: parsedEvent.response.usage?.output_tokens ?? 0,
+        ...(cachedTokens !== undefined && {
+          cache_read_input_tokens: cachedTokens,
+        }),
+      },
+    },
+    {
+      type: "message_stop",
+    },
+  )
+
+  return events
+}
+
+function openBlock(params: {
+  state: ResponsesStreamState
+  key: string
+  contentBlock:
+    | { type: "thinking"; thinking: string }
+    | { type: "text"; text: string }
+    | {
+        type: "tool_use"
+        id: string
+        name: string
+        input: Record<string, unknown>
+      }
+  events: Array<AnthropicStreamEventData>
+}): number {
+  const { state, key, contentBlock, events } = params
+  if (state.openBlockKey === key && state.openBlockIndex !== undefined) {
+    return state.openBlockIndex
+  }
+
+  const blockIndex = state.nextContentBlockIndex
+  state.nextContentBlockIndex += 1
+  closeOpenBlock(state, events)
+  events.push({
+    type: "content_block_start",
+    index: blockIndex,
+    content_block: contentBlock,
+  })
+  state.openBlockKey = key
+  state.openBlockIndex = blockIndex
+  return blockIndex
+}
+
+function closeOpenBlock(
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+): void {
+  if (state.openBlockIndex === undefined) {
+    return
+  }
+
+  events.push({
+    type: "content_block_stop",
+    index: state.openBlockIndex,
+  })
+  state.openBlockKey = undefined
+  state.openBlockIndex = undefined
+}
+
+function getThinkingKey(outputIndex: number): string {
+  return `thinking:${outputIndex}`
+}
+
+function getTextKey(outputIndex: number, contentIndex: number): string {
+  return `text:${outputIndex}:${contentIndex}`
+}
+
+function getToolKey(outputIndex: number): string {
+  return `tool:${outputIndex}`
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null
+}
+
+function isResponseObject(value: unknown): value is ResponsesApiResponse {
+  return (
+    isRecord(value)
+    && typeof value.id === "string"
+    && typeof value.model === "string"
+  )
+}
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
new file mode 100644
index 000000000..0ac3d004a
--- /dev/null
+++ b/src/routes/messages/responses-translation.ts
@@ -0,0 +1,409 @@
+import type {
+  ResponseInputItem,
+  ResponseInputContentPart,
+  ResponsesApiResponse,
+  ResponsesOutputItem,
+  ResponsesPayload,
+  ResponsesReasoningItem,
+} from "~/services/copilot/create-responses"
+
+import type {
+  AnthropicAssistantContentBlock,
+  AnthropicAssistantMessage,
+  AnthropicMessage,
+  AnthropicMessagesPayload,
+  AnthropicResponse,
+  AnthropicTextBlock,
+  AnthropicThinkingBlock,
+  AnthropicTool,
+  AnthropicToolResultBlock,
+  AnthropicToolUseBlock,
+  AnthropicUserContentBlock,
+  AnthropicUserMessage,
+} from "./anthropic-types"
+
+import { translateModelName } from "./non-stream-translation"
+
+export const THINKING_TEXT = "Thinking..."
+
+type InputItem = ResponseInputItem
+
+export function translateAnthropicToResponses(
+  payload: AnthropicMessagesPayload,
+): ResponsesPayload {
+  const input = [
+    ...translateSystemPrompt(payload.system),
+    ...payload.messages.flatMap((message) => translateMessage(message)),
+  ]
+
+  return {
+    model: translateModelName(payload.model),
+    input,
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    max_output_tokens: payload.max_tokens,
+    stop: payload.stop_sequences,
+    tools: translateAnthropicTools(payload.tools) as Array<unknown> | undefined,
+    tool_choice: translateAnthropicToolChoice(payload.tool_choice),
+    user: payload.metadata?.user_id,
+    reasoning: {
+      summary: "detailed",
+    },
+    include: ["reasoning.encrypted_content"],
+  }
+}
+
+export function translateResponsesToAnthropic(
+  response: ResponsesApiResponse,
+): AnthropicResponse {
+  const content = response.output?.flatMap((item) => mapOutputItem(item)) ?? []
+
+  const anthropicContent =
+    content.length > 0 || !response.output_text ?
+      content
+    : [{ type: "text", text: response.output_text }]
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    content: anthropicContent,
+    model: response.model,
+    stop_reason: getStopReason(response),
+    stop_sequence: null,
+    usage: {
+      input_tokens:
+        (response.usage?.input_tokens ?? 0)
+        - (response.usage?.input_tokens_details?.cached_tokens ?? 0),
+      output_tokens: response.usage?.output_tokens ?? 0,
+      ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && {
+        cache_read_input_tokens:
+          response.usage.input_tokens_details.cached_tokens,
+      }),
+    },
+  }
+}
+
+function translateSystemPrompt(
+  system: AnthropicMessagesPayload["system"],
+): Array<InputItem> {
+  if (!system) {
+    return []
+  }
+
+  if (typeof system === "string") {
+    return [{ role: "system", content: system }]
+  }
+
+  return [
+    {
+      role: "system",
+      content: system.map((block) => block.text).join("\n\n"),
+    },
+  ]
+}
+
+function translateMessage(message: AnthropicMessage): Array<InputItem> {
+  return message.role === "user" ?
+      translateUserMessage(message)
+    : translateAssistantMessage(message)
+}
+
+function translateUserMessage(message: AnthropicUserMessage): Array<InputItem> {
+  if (typeof message.content === "string") {
+    return [{ role: "user", content: message.content }]
+  }
+
+  const toolResults = message.content.filter(
+    (block): block is AnthropicToolResultBlock => block.type === "tool_result",
+  )
+  const otherBlocks = message.content.filter(
+    (block) => block.type !== "tool_result",
+  )
+
+  const translated = toolResults.map((block) => ({
+    role: "tool" as const,
+    tool_call_id: block.tool_use_id,
+    content: typeof block.content === "string" ? block.content : "",
+  }))
+
+  if (otherBlocks.length === 0) {
+    return translated
+  }
+
+  return [
+    ...translated,
+    {
+      role: "user",
+      content: mapUserContent(otherBlocks),
+    },
+  ]
+}
+
+function translateAssistantMessage(
+  message: AnthropicAssistantMessage,
+): Array<InputItem> {
+  if (typeof message.content === "string") {
+    return [{ role: "assistant", content: message.content }]
+  }
+
+  const reasoningItems = message.content.flatMap((block) =>
+    block.type === "thinking" && block.signature ?
+      [translateThinkingBlock(block)]
+    : [],
+  )
+  const text = message.content
+    .filter(
+      (block): block is AnthropicTextBlock | AnthropicThinkingBlock =>
+        block.type === "text"
+        || (block.type === "thinking" && !block.signature),
+    )
+    .map((block) => (block.type === "text" ? block.text : block.thinking))
+    .join("\n\n")
+  const toolCalls = message.content
+    .filter(
+      (block): block is AnthropicToolUseBlock => block.type === "tool_use",
+    )
+    .map((block) => ({
+      id: block.id,
+      type: "function" as const,
+      function: {
+        name: block.name,
+        arguments: JSON.stringify(block.input),
+      },
+    }))
+
+  if (text.length === 0 && toolCalls.length === 0) {
+    return reasoningItems
+  }
+
+  return [
+    ...reasoningItems,
+    {
+      role: "assistant",
+      content: text,
+      ...(toolCalls.length > 0 && { tool_calls: toolCalls }),
+    },
+  ]
+}
+
+function mapUserContent(
+  content: Array<AnthropicUserContentBlock>,
+): string | Array<ResponseInputContentPart> {
+  const hasImage = content.some((block) => block.type === "image")
+  if (!hasImage) {
+    return content
+      .filter((block): block is AnthropicTextBlock => block.type === "text")
+      .map((block) => block.text)
+      .join("\n\n")
+  }
+
+  return content.flatMap((block) => {
+    if (block.type === "text") {
+      return [{ type: "input_text" as const, text: block.text }]
+    }
+
+    if (block.type === "image") {
+      return [
+        {
+          type: "input_image" as const,
+          image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+          detail: "auto" as const,
+        },
+      ]
+    }
+
+    return []
+  })
+}
+
+function translateThinkingBlock(
+  block: AnthropicThinkingBlock & { signature: string },
+) {
+  const { encryptedContent, id } = parseThinkingSignature(block.signature)
+  return {
+    type: "reasoning" as const,
+    encrypted_content: encryptedContent,
+    ...(id ? { id } : {}),
+    summary:
+      block.thinking && block.thinking !== THINKING_TEXT ?
+        [{ type: "summary_text" as const, text: block.thinking }]
+      : [],
+  }
+}
+
+function mapOutputItem(
+  item: ResponsesOutputItem,
+): Array<AnthropicAssistantContentBlock> {
+  if (item.type === "reasoning") {
+    return mapReasoningItem(item)
+  }
+
+  if (item.type === "function_call") {
+    if (!item.call_id || !item.name) {
+      return []
+    }
+
+    return [
+      {
+        type: "tool_use",
+        id: item.call_id,
+        name: item.name,
+        input: parseFunctionArguments(item.arguments),
+      },
+    ]
+  }
+
+  const text = extractMessageText(item.content)
+  return text.length > 0 ? [{ type: "text", text }] : []
+}
+
+function mapReasoningItem(
+  item: ResponsesReasoningItem,
+): Array<AnthropicAssistantContentBlock> {
+  const signature = buildThinkingSignature(item.encrypted_content, item.id)
+  if (!signature) {
+    return []
+  }
+
+  return [
+    {
+      type: "thinking",
+      thinking: extractThinkingText(item.summary),
+      signature,
+    },
+  ]
+}
+
+function parseThinkingSignature(signature: string): {
+  encryptedContent: string
+  id: string | undefined
+} {
+  const separatorIndex = signature.lastIndexOf("@")
+  if (separatorIndex <= 0 || separatorIndex === signature.length - 1) {
+    return { encryptedContent: signature, id: undefined }
+  }
+
+  return {
+    encryptedContent: signature.slice(0, separatorIndex),
+    id: signature.slice(separatorIndex + 1),
+  }
+}
+
+function extractThinkingText(
+  summary: Array<{ type: string; text?: string }> | undefined,
+): string {
+  const text = summary
+    ?.filter((block) => typeof block.text === "string")
+    .map((block) => block.text)
+    .join("")
+    .trim()
+
+  return text && text.length > 0 ? text : THINKING_TEXT
+}
+
+function buildThinkingSignature(
+  encryptedContent: string | undefined,
+  id: string | undefined,
+): string | undefined {
+  if (!encryptedContent) {
+    return undefined
+  }
+
+  return id ? `${encryptedContent}@${id}` : encryptedContent
+}
+
+function extractMessageText(
+  content: string | Array<{ type: string; text?: string }>,
+): string {
+  if (typeof content === "string") {
+    return content
+  }
+
+  return content
+    .filter((part) => part.type === "output_text" || part.type === "text")
+    .map((part) => part.text ?? "")
+    .join("")
+}
+
+function parseFunctionArguments(
+  argumentsText: string | undefined,
+): Record<string, unknown> {
+  if (!argumentsText) {
+    return {}
+  }
+
+  try {
+    return JSON.parse(argumentsText) as Record<string, unknown>
+  } catch {
+    return {}
+  }
+}
+
+function translateAnthropicTools(
+  anthropicTools: Array<AnthropicTool> | undefined,
+):
+  | Array<{
+      type: "function"
+      function: {
+        name: string
+        description?: string
+        parameters: Record<string, unknown>
+      }
+    }>
+  | undefined {
+  if (!anthropicTools) {
+    return undefined
+  }
+
+  return anthropicTools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.input_schema,
+    },
+  }))
+}
+
+function translateAnthropicToolChoice(
+  anthropicToolChoice: AnthropicMessagesPayload["tool_choice"],
+):
+  | "auto"
+  | "required"
+  | "none"
+  | { type: "function"; function: { name: string } }
+  | undefined {
+  if (!anthropicToolChoice) {
+    return undefined
+  }
+
+  switch (anthropicToolChoice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      return anthropicToolChoice.name ?
+          { type: "function", function: { name: anthropicToolChoice.name } }
+        : "auto"
+    }
+    case "none": {
+      return "none"
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+function getStopReason(
+  response: ResponsesApiResponse,
+): AnthropicResponse["stop_reason"] {
+  return response.output?.some((item) => item.type === "function_call") ?
+      "tool_use"
+    : "end_turn"
+}
diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts
index 5254e2af7..475310c1b 100644
--- a/src/routes/models/route.ts
+++ b/src/routes/models/route.ts
@@ -1,6 +1,9 @@
 import { Hono } from "hono"
 
+import type { Model } from "~/services/copilot/get-models"
+
 import { forwardError } from "~/lib/error"
+import { getModelLevelsForModel } from "~/lib/model-level"
 import { state } from "~/lib/state"
 import { cacheModels } from "~/lib/utils"
 
@@ -13,15 +16,7 @@ modelRoutes.get("/", async (c) => {
       await cacheModels()
     }
 
-    const models = state.models?.data.map((model) => ({
-      id: model.id,
-      object: "model",
-      type: "model",
-      created: 0, // No date available from source
-      created_at: new Date(0).toISOString(), // No date available from source
-      owned_by: model.vendor,
-      display_name: model.name,
-    }))
+    const models = expandModelList(state.models?.data ?? [])
 
     return c.json({
       object: "list",
@@ -32,3 +27,31 @@ modelRoutes.get("/", async (c) => {
     return await forwardError(c, error)
   }
 })
+
+export function expandModelList(models: Array<Model>) {
+  return models.flatMap((model) => {
+    const expanded = [toModelItem(model, model.id)]
+    const levels = getModelLevelsForModel(model.id)
+    if (!levels) {
+      return expanded
+    }
+
+    for (const level of levels) {
+      expanded.push(toModelItem(model, `${model.id}(${level})`))
+    }
+
+    return expanded
+  })
+}
+
+function toModelItem(model: Model, id: string) {
+  return {
+    id,
+    object: "model",
+    type: "model",
+    created: 0,
+    created_at: new Date(0).toISOString(),
+    owned_by: model.vendor,
+    display_name: model.name,
+  }
+}
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 000000000..0462f77c8
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,40 @@
+import { Hono } from "hono"
+import { streamSSE, type SSEMessage } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { forwardError } from "~/lib/error"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createResponses,
+  type ResponsesApiResponse,
+  type ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    await checkRateLimit(state)
+
+    const payload = await c.req.json<ResponsesPayload>()
+    if (state.manualApprove) await awaitApproval()
+
+    const response = await createResponses(payload)
+    if (isNonStreamingResponse(response)) {
+      return c.json(response)
+    }
+
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of response) {
+        await stream.writeSSE(chunk as SSEMessage)
+      }
+    })
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
+
+const isNonStreamingResponse = (
+  response: Awaited<ReturnType<typeof createResponses>>,
+): response is ResponsesApiResponse => !(Symbol.asyncIterator in response)
diff --git a/src/server.ts b/src/server.ts
index 462a278f3..7b9387e64 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -21,11 +22,13 @@ server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
 server.route("/usage", usageRoute)
 server.route("/token", tokenRoute)
+server.route("/responses", responsesRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/responses", responsesRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 8534151da..7fefd6de2 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -3,14 +3,22 @@ import { events } from "fetch-event-stream"
 
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
+import {
+  isClaudeThinkingModel,
+  parseModelNameWithLevel,
+  supportsGptReasoningEffort,
+  type ModelLevel,
+} from "~/lib/model-level"
 import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
   payload: ChatCompletionsPayload,
 ) => {
+  const normalizedPayload = normalizeChatCompletionsPayloadModel(payload)
+
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
-  const enableVision = payload.messages.some(
+  const enableVision = normalizedPayload.messages.some(
     (x) =>
       typeof x.content !== "string"
       && x.content?.some((x) => x.type === "image_url"),
@@ -18,7 +26,7 @@ export const createChatCompletions = async (
 
   // Agent/user check for X-Initiator header
   // Determine if any message is from an agent ("assistant" or "tool")
-  const isAgentCall = payload.messages.some((msg) =>
+  const isAgentCall = normalizedPayload.messages.some((msg) =>
     ["assistant", "tool"].includes(msg.role),
   )
 
@@ -31,7 +39,7 @@ export const createChatCompletions = async (
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
     headers,
-    body: JSON.stringify(payload),
+    body: JSON.stringify(normalizedPayload),
   })
 
   if (!response.ok) {
@@ -39,13 +47,51 @@ export const createChatCompletions = async (
     throw new HTTPError("Failed to create chat completions", response)
   }
 
-  if (payload.stream) {
+  if (normalizedPayload.stream) {
     return events(response)
   }
 
   return (await response.json()) as ChatCompletionResponse
 }
 
+export const normalizeChatCompletionsPayloadModel = (
+  payload: ChatCompletionsPayload,
+): ChatCompletionsPayload => {
+  const { baseModel, level } = parseModelNameWithLevel(payload.model)
+  if (!level) {
+    return payload
+  }
+
+  const nextPayload: ChatCompletionsPayload = {
+    ...payload,
+    model: baseModel,
+  }
+
+  if (supportsGptReasoningEffort(baseModel)) {
+    nextPayload.reasoning_effort = level
+    return nextPayload
+  }
+
+  if (isClaudeThinkingModel(baseModel) && level !== "xhigh") {
+    const currentThinking =
+      isRecord(payload.thinking) ? payload.thinking : ({} as ThinkingConfig)
+    nextPayload.reasoning_effort = level
+    nextPayload.thinking = {
+      ...currentThinking,
+      type:
+        typeof currentThinking.type === "string" ?
+          currentThinking.type
+        : "enabled",
+      effort: level,
+    }
+  }
+
+  return nextPayload
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
 // Streaming types
 
 export interface ChatCompletionChunk {
@@ -148,6 +194,17 @@ export interface ChatCompletionsPayload {
     | { type: "function"; function: { name: string } }
     | null
   user?: string | null
+  reasoning_effort?: ModelLevel | null
+  reasoning?: {
+    effort?: ModelLevel
+  } | null
+  thinking?: ThinkingConfig | null
+}
+
+interface ThinkingConfig {
+  type?: string
+  effort?: ModelLevel
+  [key: string]: unknown
 }
 
 export interface Tool {
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 000000000..e07b7415c
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,185 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { type ModelLevel } from "~/lib/model-level"
+import { state } from "~/lib/state"
+
+export const createResponses = async (payload: ResponsesPayload) => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, hasVisionInput(payload.input)),
+    "X-Initiator": hasAgentInput(payload.input) ? "agent" : "user",
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/v1/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create response", response)
+    throw new HTTPError("Failed to create response", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponsesApiResponse
+}
+
+export interface ResponsesPayload {
+  model: string
+  input: string | Array<ResponseInputItem>
+  stream?: boolean | null
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  stop?: string | Array<string> | null
+  tools?: Array<unknown> | null
+  tool_choice?: unknown
+  user?: string | null
+  reasoning_effort?: ModelLevel | null
+  reasoning?: {
+    effort?: ModelLevel
+    summary?: string
+  } | null
+  include?: Array<string> | null
+}
+
+export type ResponseInputItem = ResponseInputMessage | ResponseInputReasoning
+
+export interface ResponseInputMessage {
+  role: "user" | "assistant" | "system" | "tool" | "developer"
+  content: string | Array<ResponseInputContentPart>
+  name?: string
+  tool_call_id?: string
+  tool_calls?: Array<ResponseInputToolCall>
+}
+
+export interface ResponseInputReasoning {
+  type: "reasoning"
+  encrypted_content: string
+  id?: string
+  summary?: Array<{
+    type: "summary_text"
+    text: string
+  }>
+}
+
+export interface ResponseInputContentPart {
+  type: "input_text" | "input_image"
+  text?: string
+  image_url?: string
+  detail?: "low" | "high" | "auto"
+}
+
+export interface ResponseInputToolCall {
+  id: string
+  type: "function"
+  function: {
+    name: string
+    arguments: string
+  }
+}
+
+export interface ResponsesApiResponse {
+  id: string
+  object: string
+  created_at?: number
+  model: string
+  output?: Array<ResponsesOutputItem>
+  output_text?: string
+  usage?: {
+    input_tokens?: number
+    output_tokens?: number
+    total_tokens?: number
+    input_tokens_details?: {
+      cached_tokens?: number
+    }
+  }
+}
+
+export type ResponsesOutputItem =
+  | ResponsesOutputMessage
+  | ResponsesFunctionCall
+  | ResponsesReasoningItem
+
+export interface ResponsesOutputMessage {
+  type: "message"
+  role: "assistant" | "user" | "system" | "tool"
+  content: string | Array<ResponsesOutputContentPart>
+  id?: string
+  status?: string
+  object?: string
+}
+
+export type ResponsesOutputContentPart =
+  | {
+      type: "output_text"
+      text: string
+    }
+  | {
+      type: "text"
+      text: string
+    }
+  | ResponsesFunctionCall
+  | {
+      type: string
+      [key: string]: unknown
+    }
+
+export interface ResponsesFunctionCall {
+  type: "function_call"
+  name: string
+  arguments?: string
+  call_id?: string
+  id?: string
+  object?: string
+  status?: string
+}
+
+export interface ResponsesReasoningItem {
+  type: "reasoning"
+  id?: string
+  encrypted_content?: string
+  summary?: Array<{
+    type: string
+    text?: string
+  }>
+}
+
+function hasVisionInput(input: ResponsesPayload["input"]): boolean {
+  if (!Array.isArray(input)) {
+    return false
+  }
+
+  return input.some(
+    (message) =>
+      isResponseInputMessage(message)
+      && Array.isArray(message.content)
+      && message.content.some((part) => part.type === "input_image"),
+  )
+}
+
+function hasAgentInput(input: ResponsesPayload["input"]): boolean {
+  if (!Array.isArray(input)) {
+    return false
+  }
+
+  return input.some(
+    (message) =>
+      isResponseInputMessage(message)
+      && ["assistant", "tool"].includes(message.role),
+  )
+}
+
+function isResponseInputMessage(
+  value: ResponseInputItem,
+): value is ResponseInputMessage {
+  return "role" in value
+}
diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts
index d18e741aa..7c19027ae 100644
--- a/tests/create-chat-completions.test.ts
+++ b/tests/create-chat-completions.test.ts
@@ -1,9 +1,11 @@
 import { test, expect, mock } from "bun:test"
 
 import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions"
+import type { ResponsesPayload } from "../src/services/copilot/create-responses"
 
 import { state } from "../src/lib/state"
 import { createChatCompletions } from "../src/services/copilot/create-chat-completions"
+import { createResponses } from "../src/services/copilot/create-responses"
 
 // Mock state
 state.copilotToken = "test-token"
@@ -34,7 +36,7 @@ test("sets X-Initiator to agent if tool/assistant present", async () => {
   await createChatCompletions(payload)
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
-    fetchMock.mock.calls[0][1] as { headers: Record<string, string> }
+    fetchMock.mock.calls.at(-1)?.[1] as { headers: Record<string, string> }
   ).headers
   expect(headers["X-Initiator"]).toBe("agent")
 })
@@ -50,7 +52,48 @@ test("sets X-Initiator to user if only user present", async () => {
   await createChatCompletions(payload)
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
-    fetchMock.mock.calls[1][1] as { headers: Record<string, string> }
+    fetchMock.mock.calls.at(-1)?.[1] as { headers: Record<string, string> }
   ).headers
   expect(headers["X-Initiator"]).toBe("user")
 })
+
+test("sets X-Initiator to agent for responses tool history", async () => {
+  const payload: ResponsesPayload = {
+    model: "gpt-5.4",
+    input: [
+      { role: "user", content: "hi" },
+      { role: "tool", content: '{"ok":true}', tool_call_id: "call_123" },
+    ],
+  }
+
+  await createResponses(payload)
+  const headers = (
+    fetchMock.mock.calls.at(-1)?.[1] as { headers: Record<string, string> }
+  ).headers
+  expect(headers["X-Initiator"]).toBe("agent")
+})
+
+test("enables vision header for responses image input", async () => {
+  const payload: ResponsesPayload = {
+    model: "gpt-5.4",
+    input: [
+      {
+        role: "user",
+        content: [
+          { type: "input_text", text: "describe this" },
+          {
+            type: "input_image",
+            image_url: "https://example.com/image.png",
+          },
+        ],
+      },
+    ],
+  }
+
+  await createResponses(payload)
+  const headers = (
+    fetchMock.mock.calls.at(-1)?.[1] as { headers: Record<string, string> }
+  ).headers
+  expect(headers["copilot-vision-request"]).toBe("true")
+  expect(headers["X-Initiator"]).toBe("user")
+})
diff --git a/tests/messages-responses-translation.test.ts b/tests/messages-responses-translation.test.ts
new file mode 100644
index 000000000..bed8cb97e
--- /dev/null
+++ b/tests/messages-responses-translation.test.ts
@@ -0,0 +1,420 @@
+import { describe, expect, test } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type { ResponsesApiResponse } from "~/services/copilot/create-responses"
+
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+import {
+  THINKING_TEXT,
+  translateAnthropicToResponses,
+  translateResponsesToAnthropic,
+} from "~/routes/messages/responses-translation"
+
+describe("Anthropic messages Responses request translation", () => {
+  test("preserves thinking signatures in responses input", () => {
+    const payload: AnthropicMessagesPayload = {
+      model: "claude-sonnet-4.6",
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "thinking",
+              thinking: "I should reuse the prior reasoning state.",
+              signature: "encrypted-thought@rs_123",
+            },
+            {
+              type: "text",
+              text: "I already checked that.",
+            },
+          ],
+        },
+        {
+          role: "user",
+          content: "What about the next step?",
+        },
+      ],
+      max_tokens: 512,
+      thinking: {
+        type: "enabled",
+        budget_tokens: 2048,
+      },
+    }
+
+    const translated = translateAnthropicToResponses(payload)
+    expect(translated.include).toEqual(["reasoning.encrypted_content"])
+    expect(translated.reasoning).toEqual({ summary: "detailed" })
+    expect(Array.isArray(translated.input)).toBe(true)
+
+    if (!Array.isArray(translated.input)) {
+      throw new TypeError("Expected translated input to be an array")
+    }
+
+    expect(translated.input[0]).toMatchObject({
+      type: "reasoning",
+      encrypted_content: "encrypted-thought",
+      id: "rs_123",
+      summary: [
+        {
+          type: "summary_text",
+          text: "I should reuse the prior reasoning state.",
+        },
+      ],
+    })
+    expect(translated.input[1]).toMatchObject({
+      role: "assistant",
+      content: "I already checked that.",
+    })
+    expect(translated.input[2]).toMatchObject({
+      role: "user",
+      content: "What about the next step?",
+    })
+  })
+
+  test("normalizes unsupported Claude subagent models on responses path", () => {
+    const translated = translateAnthropicToResponses({
+      model: "claude-sonnet-4-20250110",
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 64,
+      thinking: {
+        type: "enabled",
+        budget_tokens: 1024,
+      },
+    })
+
+    expect(translated.model).toBe("claude-sonnet-4")
+  })
+})
+
+describe("Anthropic messages Responses response translation", () => {
+  test("translates responses reasoning output back to anthropic thinking", () => {
+    const response: ResponsesApiResponse = {
+      id: "resp_123",
+      object: "response",
+      model: "claude-sonnet-4.6",
+      output: [
+        {
+          type: "reasoning",
+          id: "rs_123",
+          encrypted_content: "encrypted-thought",
+          summary: [
+            {
+              type: "summary_text",
+              text: "I should reuse the prior reasoning state.",
+            },
+          ],
+        },
+        {
+          type: "message",
+          role: "assistant",
+          content: [
+            {
+              type: "output_text",
+              text: "Here is the final answer.",
+            },
+          ],
+        },
+      ],
+      usage: {
+        input_tokens: 10,
+        output_tokens: 4,
+        total_tokens: 14,
+      },
+    }
+
+    const translated = translateResponsesToAnthropic(response)
+
+    expect(translated.content).toEqual([
+      {
+        type: "thinking",
+        thinking: "I should reuse the prior reasoning state.",
+        signature: "encrypted-thought@rs_123",
+      },
+      {
+        type: "text",
+        text: "Here is the final answer.",
+      },
+    ])
+    expect(translated.stop_reason).toBe("end_turn")
+  })
+})
+
+describe("Anthropic messages Responses reasoning stream translation", () => {
+  test("streams reasoning summary and signature as anthropic thinking deltas", () => {
+    const state = createResponsesStreamState()
+    const events = [
+      {
+        type: "response.created",
+        response: {
+          id: "resp_123",
+          object: "response",
+          model: "claude-sonnet-4.6",
+          usage: {
+            input_tokens: 10,
+            output_tokens: 0,
+          },
+        },
+      },
+      {
+        type: "response.reasoning_summary_text.delta",
+        output_index: 0,
+        delta: "Tracing the previous reasoning.",
+      },
+      {
+        type: "response.output_item.done",
+        output_index: 0,
+        item: {
+          type: "reasoning",
+          id: "rs_123",
+          encrypted_content: "encrypted-thought",
+        },
+      },
+      {
+        type: "response.completed",
+        response: {
+          id: "resp_123",
+          object: "response",
+          model: "claude-sonnet-4.6",
+          output: [
+            {
+              type: "reasoning",
+              id: "rs_123",
+              encrypted_content: "encrypted-thought",
+            },
+          ],
+          usage: {
+            input_tokens: 10,
+            output_tokens: 4,
+          },
+        },
+      },
+    ].flatMap((event) => translateResponsesStreamEvent(event, state))
+
+    expect(events[0]).toMatchObject({
+      type: "message_start",
+      message: {
+        id: "resp_123",
+      },
+    })
+    expect(events[1]).toEqual({
+      type: "content_block_start",
+      index: 0,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+      },
+    })
+    expect(events[2]).toEqual({
+      type: "content_block_delta",
+      index: 0,
+      delta: {
+        type: "thinking_delta",
+        thinking: "Tracing the previous reasoning.",
+      },
+    })
+    expect(events[3]).toEqual({
+      type: "content_block_delta",
+      index: 0,
+      delta: {
+        type: "signature_delta",
+        signature: "encrypted-thought@rs_123",
+      },
+    })
+    expect(events.at(-2)).toMatchObject({
+      type: "message_delta",
+      delta: {
+        stop_reason: "end_turn",
+      },
+      usage: {
+        input_tokens: 10,
+        output_tokens: 4,
+      },
+    })
+    expect(events.at(-1)).toEqual({
+      type: "message_stop",
+    })
+  })
+
+  test("does not inject placeholder thinking text when a signed block reopens", () => {
+    const state = createResponsesStreamState()
+    const events = [
+      {
+        type: "response.reasoning_summary_text.delta",
+        output_index: 0,
+        delta: "Real reasoning text.",
+      },
+      {
+        type: "response.output_text.delta",
+        output_index: 1,
+        content_index: 0,
+        delta: "Visible answer.",
+      },
+      {
+        type: "response.output_item.done",
+        output_index: 0,
+        item: {
+          type: "reasoning",
+          id: "rs_789",
+          encrypted_content: "opaque-signature",
+        },
+      },
+    ].flatMap((event) => translateResponsesStreamEvent(event, state))
+
+    const placeholderEvents = events.filter(
+      (event) =>
+        event.type === "content_block_delta"
+        && event.delta.type === "thinking_delta"
+        && event.delta.thinking === THINKING_TEXT,
+    )
+
+    expect(placeholderEvents).toHaveLength(0)
+    expect(events.at(-2)).toEqual({
+      type: "content_block_start",
+      index: 2,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+      },
+    })
+    expect(events.at(-1)).toEqual({
+      type: "content_block_delta",
+      index: 2,
+      delta: {
+        type: "signature_delta",
+        signature: "opaque-signature@rs_789",
+      },
+    })
+  })
+})
+
+describe("Anthropic messages Responses tool stream translation", () => {
+  test("reopens tool blocks with a new index and avoids duplicating done args", () => {
+    const state = createResponsesStreamState()
+    const events = [
+      {
+        type: "response.output_item.added",
+        output_index: 0,
+        item: {
+          type: "function_call",
+          call_id: "call_123",
+          name: "get_weather",
+        },
+      },
+      {
+        type: "response.function_call_arguments.delta",
+        output_index: 0,
+        delta: '{"city":"Bos',
+      },
+      {
+        type: "response.output_text.delta",
+        output_index: 1,
+        content_index: 0,
+        delta: "Working on it.",
+      },
+      {
+        type: "response.function_call_arguments.delta",
+        output_index: 0,
+        delta: 'ton"}',
+      },
+      {
+        type: "response.function_call_arguments.done",
+        output_index: 0,
+        arguments: '{"city":"Boston"}',
+      },
+    ].flatMap((event) => translateResponsesStreamEvent(event, state))
+
+    expect(events).toEqual([
+      {
+        type: "content_block_start",
+        index: 0,
+        content_block: {
+          type: "tool_use",
+          id: "call_123",
+          name: "get_weather",
+          input: {},
+        },
+      },
+      {
+        type: "content_block_delta",
+        index: 0,
+        delta: {
+          type: "input_json_delta",
+          partial_json: '{"city":"Bos',
+        },
+      },
+      {
+        type: "content_block_stop",
+        index: 0,
+      },
+      {
+        type: "content_block_start",
+        index: 1,
+        content_block: {
+          type: "text",
+          text: "",
+        },
+      },
+      {
+        type: "content_block_delta",
+        index: 1,
+        delta: {
+          type: "text_delta",
+          text: "Working on it.",
+        },
+      },
+      {
+        type: "content_block_stop",
+        index: 1,
+      },
+      {
+        type: "content_block_start",
+        index: 2,
+        content_block: {
+          type: "tool_use",
+          id: "call_123",
+          name: "get_weather",
+          input: {},
+        },
+      },
+      {
+        type: "content_block_delta",
+        index: 2,
+        delta: {
+          type: "input_json_delta",
+          partial_json: 'ton"}',
+        },
+      },
+    ])
+  })
+})
+
+describe("Anthropic messages Responses placeholder translation", () => {
+  test("uses placeholder thinking text when reasoning summary is absent", () => {
+    const translated = translateResponsesToAnthropic({
+      id: "resp_456",
+      object: "response",
+      model: "claude-sonnet-4.6",
+      output: [
+        {
+          type: "reasoning",
+          id: "rs_456",
+          encrypted_content: "opaque-thought",
+        },
+      ],
+      usage: {
+        input_tokens: 1,
+        output_tokens: 1,
+        total_tokens: 2,
+      },
+    })
+
+    expect(translated.content[0]).toEqual({
+      type: "thinking",
+      thinking: THINKING_TEXT,
+      signature: "opaque-thought@rs_456",
+    })
+  })
+})
diff --git a/tests/phase5-routing.test.ts b/tests/phase5-routing.test.ts
new file mode 100644
index 000000000..5c2aa8648
--- /dev/null
+++ b/tests/phase5-routing.test.ts
@@ -0,0 +1,339 @@
+import { describe, expect, test } from "bun:test"
+
+import type { ChatCompletionChunk } from "~/services/copilot/create-chat-completions"
+import type { ResponseInputMessage } from "~/services/copilot/create-responses"
+import type { Model } from "~/services/copilot/get-models"
+
+import {
+  getModelLevelsForModel,
+  parseModelNameWithLevel,
+} from "~/lib/model-level"
+import {
+  translateChatCompletionsToResponses,
+  translateResponsesStreamToChatStream,
+  translateResponsesToChatCompletions,
+} from "~/routes/chat-completions/responses-translation"
+import { expandModelList } from "~/routes/models/route"
+import { normalizeChatCompletionsPayloadModel } from "~/services/copilot/create-chat-completions"
+
+describe("model(level) parsing and mapping", () => {
+  test("parses suffixed model name", () => {
+    expect(parseModelNameWithLevel("gpt-5.3-codex(high)")).toEqual({
+      baseModel: "gpt-5.3-codex",
+      level: "high",
+    })
+  })
+
+  test("keeps plain model untouched", () => {
+    expect(parseModelNameWithLevel("claude-sonnet-4.6")).toEqual({
+      baseModel: "claude-sonnet-4.6",
+      level: undefined,
+    })
+  })
+
+  test("maps GPT-5.4 suffix level to reasoning_effort", () => {
+    const payload = normalizeChatCompletionsPayloadModel({
+      model: "gpt-5.4(xhigh)",
+      messages: [{ role: "user", content: "hi" }],
+    })
+
+    expect(payload.model).toBe("gpt-5.4")
+    expect(payload.reasoning_effort).toBe("xhigh")
+  })
+
+  test("maps claude suffix level while preserving thinking fields", () => {
+    const payload = normalizeChatCompletionsPayloadModel({
+      model: "claude-opus-4.6(high)",
+      messages: [{ role: "user", content: "hi" }],
+      thinking: { budget_tokens: 4096 },
+    })
+
+    expect(payload.model).toBe("claude-opus-4.6")
+    expect(payload.reasoning_effort).toBe("high")
+    expect(payload.thinking).toEqual({
+      budget_tokens: 4096,
+      effort: "high",
+      type: "enabled",
+    })
+  })
+})
+
+describe("chat/responses translation", () => {
+  test("translates chat payload to responses payload", () => {
+    const translated = translateChatCompletionsToResponses({
+      model: "gpt-5.4",
+      messages: [{ role: "user", content: "Hello" }],
+      max_tokens: 128,
+      reasoning_effort: "medium",
+    })
+
+    expect(translated).toMatchObject({
+      model: "gpt-5.4",
+      input: [{ role: "user", content: "Hello" }],
+      max_output_tokens: 128,
+      reasoning_effort: "medium",
+      reasoning: { effort: "medium" },
+    })
+  })
+
+  test("translates responses payload back to chat completion", () => {
+    const translated = translateResponsesToChatCompletions({
+      id: "resp_123",
+      object: "response",
+      created_at: 123,
+      model: "gpt-5.3-codex",
+      output: [
+        {
+          type: "message",
+          role: "assistant",
+          content: [{ type: "output_text", text: "Hello from responses" }],
+        },
+      ],
+      usage: { input_tokens: 2, output_tokens: 4, total_tokens: 6 },
+    })
+
+    expect(translated.object).toBe("chat.completion")
+    expect(translated.choices[0]?.message.content).toBe("Hello from responses")
+    expect(translated.usage).toEqual({
+      prompt_tokens: 2,
+      completion_tokens: 4,
+      total_tokens: 6,
+    })
+  })
+
+  test("preserves tool metadata when translating chat payload", () => {
+    const translated = translateChatCompletionsToResponses({
+      model: "gpt-5.4",
+      messages: [
+        {
+          role: "assistant",
+          content: null,
+          name: "planner",
+          tool_calls: [
+            {
+              id: "call_123",
+              type: "function",
+              function: {
+                name: "get_weather",
+                arguments: '{"city":"Boston"}',
+              },
+            },
+          ],
+        },
+        {
+          role: "tool",
+          content: '{"temperature":72}',
+          tool_call_id: "call_123",
+        },
+      ],
+    })
+
+    expect(Array.isArray(translated.input)).toBe(true)
+    if (!Array.isArray(translated.input)) {
+      throw new TypeError(
+        "Expected translated input to be an array of messages",
+      )
+    }
+    const input: Array<ResponseInputMessage> = translated.input
+    expect(input[0]).toMatchObject({
+      role: "assistant",
+      content: "",
+      name: "planner",
+      tool_calls: [
+        {
+          id: "call_123",
+          type: "function",
+          function: {
+            name: "get_weather",
+            arguments: '{"city":"Boston"}',
+          },
+        },
+      ],
+    })
+    expect(input[1]).toMatchObject({
+      role: "tool",
+      content: '{"temperature":72}',
+      tool_call_id: "call_123",
+    })
+  })
+
+  test("translates text streaming events back to chat chunks", async () => {
+    const translated = await collectStreamChunks(
+      translateResponsesStreamToChatStream(
+        asResponseStream([
+          {
+            type: "response.output_item.added",
+            output_index: 0,
+            item: {
+              id: "msg_123",
+              type: "message",
+              status: "in_progress",
+              role: "assistant",
+              content: [],
+            },
+          },
+          {
+            type: "response.output_text.delta",
+            item_id: "msg_123",
+            output_index: 0,
+            content_index: 0,
+            delta: "Hello",
+          },
+          {
+            type: "response.completed",
+            response: {
+              id: "resp_123",
+              object: "response",
+              model: "gpt-5.4",
+            },
+          },
+        ]),
+        "gpt-5.4",
+      ),
+    )
+
+    expect(translated[0]?.choices[0]?.delta).toEqual({
+      role: "assistant",
+      content: "Hello",
+    })
+    expect(translated[1]?.choices[0]?.finish_reason).toBe("stop")
+  })
+
+  test("translates streamed function call events back to chat chunks", async () => {
+    const translated = await collectStreamChunks(
+      translateResponsesStreamToChatStream(
+        asResponseStream([
+          {
+            type: "response.output_item.added",
+            output_index: 0,
+            item: {
+              id: "fc_123",
+              type: "function_call",
+              call_id: "call_123",
+              name: "get_weather",
+            },
+          },
+          {
+            type: "response.function_call_arguments.delta",
+            item_id: "fc_123",
+            output_index: 0,
+            call_id: "call_123",
+            delta: '{"location":"San"',
+          },
+          {
+            type: "response.completed",
+            response: {
+              id: "resp_123",
+              object: "response",
+              model: "gpt-5.4",
+            },
+          },
+        ]),
+        "gpt-5.4",
+      ),
+    )
+
+    expect(translated[0]?.choices[0]?.delta).toEqual({
+      role: "assistant",
+      tool_calls: [
+        {
+          index: 0,
+          id: "call_123",
+          type: "function",
+          function: {
+            name: "get_weather",
+            arguments: "",
+          },
+        },
+      ],
+    })
+    expect(translated[1]?.choices[0]?.delta).toEqual({
+      tool_calls: [
+        {
+          index: 0,
+          function: {
+            arguments: '{"location":"San"',
+          },
+        },
+      ],
+    })
+    expect(translated[2]?.choices[0]?.finish_reason).toBe("tool_calls")
+  })
+})
+
+describe("model listing expansion", () => {
+  test("includes required level-suffixed variants", () => {
+    const models = expandModelList([
+      makeModel("gpt-5.4"),
+      makeModel("gpt-5.3-codex"),
+      makeModel("claude-opus-4.6"),
+      makeModel("claude-opus-4.6-fast"),
+      makeModel("claude-sonnet-4.6"),
+      makeModel("gpt-4.1"),
+    ])
+    const ids = models.map((model) => model.id)
+
+    expect(ids).toContain("gpt-5.4")
+    expect(ids).toContain("gpt-5.3-codex")
+    for (const level of getModelLevelsForModel("gpt-5.4") ?? []) {
+      expect(ids).toContain(`gpt-5.4(${level})`)
+    }
+    for (const level of getModelLevelsForModel("gpt-5.3-codex") ?? []) {
+      expect(ids).toContain(`gpt-5.3-codex(${level})`)
+    }
+    for (const level of getModelLevelsForModel("claude-opus-4.6") ?? []) {
+      expect(ids).toContain(`claude-opus-4.6(${level})`)
+      expect(ids).toContain(`claude-opus-4.6-fast(${level})`)
+      expect(ids).toContain(`claude-sonnet-4.6(${level})`)
+    }
+    expect(ids).toContain("gpt-4.1")
+  })
+})
+
+function makeModel(id: string): Model {
+  return {
+    id,
+    name: id,
+    object: "model",
+    model_picker_enabled: true,
+    preview: false,
+    vendor: "test",
+    version: "1",
+    capabilities: {
+      family: "test",
+      limits: {},
+      object: "model_capabilities",
+      supports: {},
+      tokenizer: "test",
+      type: "chat",
+    },
+  }
+}
+
+async function collectStreamChunks(
+  stream: AsyncIterable<{ data?: string | Promise<string> }>,
+): Promise<Array<ChatCompletionChunk>> {
+  const chunks: Array<ChatCompletionChunk> = []
+  for await (const event of stream) {
+    const data = await event.data
+    if (data === "[DONE]") {
+      continue
+    }
+    chunks.push(JSON.parse(data ?? "{}") as ChatCompletionChunk)
+  }
+  return chunks
+}
+
+async function* asResponseStream(events: Array<Record<string, unknown>>) {
+  for (const event of events) {
+    await Promise.resolve()
+    yield {
+      data: JSON.stringify(event),
+    }
+  }
+
+  await Promise.resolve()
+  yield {
+    data: "[DONE]",
+  }
+}