diff --git a/bun.lock b/bun.lock index 20e895e7f..9ece87578 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-api", diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9ed..00c0674d0 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -7,7 +7,10 @@ import { awaitApproval } from "~/lib/approval" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" -import { isNullish } from "~/lib/utils" +import { + normalizeOutputTokenParam, + translateModelName, +} from "~/routes/messages/non-stream-translation" import { createChatCompletions, type ChatCompletionResponse, @@ -24,6 +27,7 @@ export async function handleCompletion(c: Context) { const selectedModel = state.models?.data.find( (model) => model.id === payload.model, ) + const translatedModel = translateModelName(payload.model) // Calculate and display token count try { @@ -39,13 +43,11 @@ export async function handleCompletion(c: Context) { if (state.manualApprove) await awaitApproval() - if (isNullish(payload.max_tokens)) { - payload = { - ...payload, - max_tokens: selectedModel?.capabilities.limits.max_output_tokens, - } - consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens)) - } + payload = normalizeOutputTokenParam( + payload, + translatedModel, + selectedModel?.capabilities.limits.max_output_tokens, + ) const response = await createChatCompletions(payload) diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..45f55a932 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,6 @@ +import consola from "consola" + +import { isNullish } from "~/lib/utils" import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -26,16 +29,94 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" // Payload translation +type OutputTokenParam = Partial< + Pick +> + +export function modelUsesMaxCompletionTokens(model: string): boolean { + return /^(?:gpt-5(?:[.-]|$)|o[1-9](?:[.-]|$))/.test(model) +} + +export function getOutputTokenParam( + model: string, + maxTokens: number | null | undefined, +): OutputTokenParam { + if (isNullish(maxTokens)) { + return {} + } + + return modelUsesMaxCompletionTokens(model) ? + { max_completion_tokens: maxTokens } + : { max_tokens: maxTokens } +} + +export function normalizeOutputTokenParam( + payload: ChatCompletionsPayload, + model: string, + defaultMaxTokens?: number | null, +): ChatCompletionsPayload { + const nextPayload: ChatCompletionsPayload = { + ...payload, + } + + const effectiveMaxTokens = + isNullish(payload.max_tokens) ? + payload.max_completion_tokens + : payload.max_tokens + + if (modelUsesMaxCompletionTokens(model)) { + delete nextPayload.max_tokens + + if (!isNullish(effectiveMaxTokens)) { + nextPayload.max_completion_tokens = effectiveMaxTokens + } else if (!isNullish(defaultMaxTokens)) { + nextPayload.max_completion_tokens = defaultMaxTokens + } + + consola.debug( + "Normalized output token param:", + JSON.stringify({ + model, + param: "max_completion_tokens", + value: nextPayload.max_completion_tokens, + }), + ) + + return nextPayload + } + + delete nextPayload.max_completion_tokens + + if (!isNullish(effectiveMaxTokens)) { + nextPayload.max_tokens = effectiveMaxTokens + } else if (!isNullish(defaultMaxTokens)) { + nextPayload.max_tokens = defaultMaxTokens + } + + consola.debug( + "Normalized output token param:", + JSON.stringify({ + model, + param: "max_tokens", + value: nextPayload.max_tokens, + }), + ) + + return nextPayload +} + export function translateToOpenAI( payload: AnthropicMessagesPayload, ): ChatCompletionsPayload { + const model = translateModelName(payload.model) + return { - model: translateModelName(payload.model), + model, messages: translateAnthropicMessagesToOpenAI( payload.messages, payload.system, ), - max_tokens: payload.max_tokens, + ...getOutputTokenParam(model, payload.max_tokens), stop: payload.stop_sequences, stream: payload.stream, temperature: payload.temperature, @@ -46,7 +127,7 @@ export function translateToOpenAI( } } -function translateModelName(model: string): string { +export function translateModelName(model: string): string { // Subagent requests use a specific model number which Copilot doesn't support if (model.startsWith("claude-sonnet-4-")) { return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4") diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..dfe3bc201 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -130,6 +130,7 @@ export interface ChatCompletionsPayload { temperature?: number | null top_p?: number | null max_tokens?: number | null + max_completion_tokens?: number | null stop?: string | Array | null n?: number | null stream?: boolean | null