Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Route GPT models through responses API
Broaden GPT-family internal routing to the Responses path, add GPT-5.4 support, preserve chat-to-responses metadata, improve streaming tool call translation, align Responses request headers, and add regression coverage.
  • Loading branch information
Godzilla675 authored and sngodzilla1 committed Mar 13, 2026
commit 6ecf29a01f00ca626d7c3a3dcd8ccc70339e270a
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ These endpoints mimic the OpenAI API structure.
| `GET /v1/models` | `GET` | Lists the currently available models. |
| `POST /v1/embeddings` | `POST` | Creates an embedding vector representing the input text. |

For GPT-family models, `/v1/chat/completions` is a compatibility layer. If you need native Responses features and the best chance of preserving model-specific reasoning metadata, prefer `POST /v1/responses`.

### Anthropic Compatible Endpoints

These endpoints are designed to be compatible with the Anthropic Messages API.
Expand Down
22 changes: 18 additions & 4 deletions src/lib/model-level.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ export const MODEL_LEVELS = ["low", "medium", "high", "xhigh"] as const

export type ModelLevel = (typeof MODEL_LEVELS)[number]

export const MODEL_LEVEL_VARIANTS = {
"gpt-5.3-codex": MODEL_LEVELS,
const CLAUDE_MODEL_LEVEL_VARIANTS = {
"claude-opus-4.6": ["low", "medium", "high"],
"claude-opus-4.6-fast": ["low", "medium", "high"],
"claude-sonnet-4.6": ["low", "medium", "high"],
Expand All @@ -29,8 +28,23 @@ export const parseModelNameWithLevel = (
}
}

export const isCodexResponsesModel = (model: string): boolean =>
model === "gpt-5.3-codex"
export const isGptResponsesModel = (model: string): boolean =>
model.startsWith("gpt-")

export const supportsGptReasoningEffort = (model: string): boolean =>
model.startsWith("gpt-5")

export const getModelLevelsForModel = (
model: string,
): ReadonlyArray<ModelLevel> | undefined => {
if (supportsGptReasoningEffort(model)) {
return MODEL_LEVELS
}

return CLAUDE_MODEL_LEVEL_VARIANTS[
model as keyof typeof CLAUDE_MODEL_LEVEL_VARIANTS
]
}

export const isClaudeThinkingModel = (model: string): boolean =>
model === "claude-opus-4.6"
Expand Down
9 changes: 3 additions & 6 deletions src/routes/chat-completions/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import consola from "consola"
import { streamSSE, type SSEMessage } from "hono/streaming"

import { awaitApproval } from "~/lib/approval"
import {
isCodexResponsesModel,
parseModelNameWithLevel,
} from "~/lib/model-level"
import { isGptResponsesModel, parseModelNameWithLevel } from "~/lib/model-level"
import { checkRateLimit } from "~/lib/rate-limit"
import { state } from "~/lib/state"
import { getTokenCount } from "~/lib/tokenizer"
Expand Down Expand Up @@ -65,15 +62,15 @@ export async function handleCompletion(c: Context) {

const normalizedPayload = normalizeChatCompletionsPayloadModel(payload)

if (isCodexResponsesModel(baseModel)) {
if (isGptResponsesModel(baseModel)) {
const responsesPayload =
translateChatCompletionsToResponses(normalizedPayload)
const responses = await createResponses(responsesPayload)

if (isNonStreamingResponse(responses)) {
const completionResponse = translateResponsesToChatCompletions(responses)
consola.debug(
"Codex translated response:",
"GPT translated response:",
JSON.stringify(completionResponse).slice(-400),
)
return c.json(completionResponse)
Expand Down
Loading