Route GPT models through responses API

Broaden GPT-family internal routing to the Responses path, add GPT-5.4 support, preserve chat-to-responses metadata, improve streaming tool call translation, align Responses request headers, and add regression coverage.
ericc-ch · Godzilla675 · Mar 4, 2026 · Mar 13, 2026 · Mar 20, 2026 · Mar 20, 2026
commit 6ecf29a01f00ca626d7c3a3dcd8ccc70339e270a
diff --git a/README.md b/README.md
@@ -192,6 +192,8 @@ These endpoints mimic the OpenAI API structure.
 | `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
 | `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
 
+For GPT-family models, `/v1/chat/completions` is a compatibility layer. If you need native Responses features and the best chance of preserving model-specific reasoning metadata, prefer `POST /v1/responses`.
+
 ### Anthropic Compatible Endpoints
 
 These endpoints are designed to be compatible with the Anthropic Messages API.

diff --git a/src/lib/model-level.ts b/src/lib/model-level.ts
@@ -2,8 +2,7 @@ export const MODEL_LEVELS = ["low", "medium", "high", "xhigh"] as const
 
 export type ModelLevel = (typeof MODEL_LEVELS)[number]
 
-export const MODEL_LEVEL_VARIANTS = {
-  "gpt-5.3-codex": MODEL_LEVELS,
+const CLAUDE_MODEL_LEVEL_VARIANTS = {
   "claude-opus-4.6": ["low", "medium", "high"],
   "claude-opus-4.6-fast": ["low", "medium", "high"],
   "claude-sonnet-4.6": ["low", "medium", "high"],
@@ -29,8 +28,23 @@ export const parseModelNameWithLevel = (
   }
 }
 
-export const isCodexResponsesModel = (model: string): boolean =>
-  model === "gpt-5.3-codex"
+export const isGptResponsesModel = (model: string): boolean =>
+  model.startsWith("gpt-")
+
+export const supportsGptReasoningEffort = (model: string): boolean =>
+  model.startsWith("gpt-5")
+
+export const getModelLevelsForModel = (
+  model: string,
+): ReadonlyArray<ModelLevel> | undefined => {
+  if (supportsGptReasoningEffort(model)) {
+    return MODEL_LEVELS
+  }
+
+  return CLAUDE_MODEL_LEVEL_VARIANTS[
+    model as keyof typeof CLAUDE_MODEL_LEVEL_VARIANTS
+  ]
+}
 
 export const isClaudeThinkingModel = (model: string): boolean =>
   model === "claude-opus-4.6"

diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
@@ -4,10 +4,7 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
-import {
-  isCodexResponsesModel,
-  parseModelNameWithLevel,
-} from "~/lib/model-level"
+import { isGptResponsesModel, parseModelNameWithLevel } from "~/lib/model-level"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -65,15 +62,15 @@ export async function handleCompletion(c: Context) {
 
   const normalizedPayload = normalizeChatCompletionsPayloadModel(payload)
 
-  if (isCodexResponsesModel(baseModel)) {
+  if (isGptResponsesModel(baseModel)) {
     const responsesPayload =
       translateChatCompletionsToResponses(normalizedPayload)
     const responses = await createResponses(responsesPayload)
 
     if (isNonStreamingResponse(responses)) {
       const completionResponse = translateResponsesToChatCompletions(responses)
       consola.debug(
-        "Codex translated response:",
+        "GPT translated response:",
         JSON.stringify(completionResponse).slice(-400),
       )
       return c.json(completionResponse)