diff --git a/bun.lock b/bun.lock index 20e895e7f..9ece87578 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-api", diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts new file mode 100644 index 000000000..0261897a7 --- /dev/null +++ b/src/routes/responses/handler.ts @@ -0,0 +1,61 @@ +import type { Context } from "hono" + +import consola from "consola" +import { streamSSE } from "hono/streaming" + +import { awaitApproval } from "~/lib/approval" +import { checkRateLimit } from "~/lib/rate-limit" +import { state } from "~/lib/state" +import { isNullish } from "~/lib/utils" +import { + createResponses, + type ResponseObject, + type ResponsesPayload, +} from "~/services/copilot/create-responses" + +export async function handleResponse(c: Context) { + await checkRateLimit(state) + + let payload = await c.req.json() + consola.debug("Request payload:", JSON.stringify(payload).slice(-400)) + + const selectedModel = state.models?.data.find( + (model) => model.id === payload.model, + ) + + if (isNullish(payload.max_output_tokens)) { + payload = { + ...payload, + max_output_tokens: selectedModel?.capabilities.limits.max_output_tokens, + } + consola.debug( + "Set max_output_tokens to:", + JSON.stringify(payload.max_output_tokens), + ) + } + + if (state.manualApprove) await awaitApproval() + + const response = await createResponses(payload) + + if (isNonStreaming(response)) { + consola.debug("Non-streaming response:", JSON.stringify(response)) + return c.json(response) + } + + consola.debug("Streaming response") + return streamSSE(c, async (stream) => { + for await (const chunk of response) { + consola.debug("Streaming chunk:", JSON.stringify(chunk)) + if (!chunk.data) continue + await stream.writeSSE({ + event: chunk.event, + data: chunk.data, + }) + } + }) +} + +const isNonStreaming = ( + response: Awaited>, +): response is ResponseObject => Object.hasOwn(response, "output") diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts new file mode 100644 index 000000000..e1f9d0b3c --- /dev/null +++ b/src/routes/responses/route.ts @@ -0,0 +1,15 @@ +import { Hono } from "hono" + +import { forwardError } from "~/lib/error" + +import { handleResponse } from "./handler" + +export const responsesRoutes = new Hono() + +responsesRoutes.post("/", async (c) => { + try { + return await handleResponse(c) + } catch (error) { + return await forwardError(c, error) + } +}) diff --git a/src/server.ts b/src/server.ts index 462a278f3..4c968195e 100644 --- a/src/server.ts +++ b/src/server.ts @@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route" import { embeddingRoutes } from "./routes/embeddings/route" import { messageRoutes } from "./routes/messages/route" import { modelRoutes } from "./routes/models/route" +import { responsesRoutes } from "./routes/responses/route" import { tokenRoute } from "./routes/token/route" import { usageRoute } from "./routes/usage/route" @@ -19,6 +20,7 @@ server.get("/", (c) => c.text("Server running")) server.route("/chat/completions", completionRoutes) server.route("/models", modelRoutes) server.route("/embeddings", embeddingRoutes) +server.route("/responses", responsesRoutes) server.route("/usage", usageRoute) server.route("/token", tokenRoute) @@ -26,6 +28,7 @@ server.route("/token", tokenRoute) server.route("/v1/chat/completions", completionRoutes) server.route("/v1/models", modelRoutes) server.route("/v1/embeddings", embeddingRoutes) +server.route("/v1/responses", responsesRoutes) // Anthropic compatible endpoints server.route("/v1/messages", messageRoutes) diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts new file mode 100644 index 000000000..d19681743 --- /dev/null +++ b/src/services/copilot/create-responses.ts @@ -0,0 +1,166 @@ +import consola from "consola" +import { events } from "fetch-event-stream" + +import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +export const createResponses = async (payload: ResponsesPayload) => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const enableVision = + Array.isArray(payload.input) + && payload.input.some( + (x) => + Array.isArray(x.content) + && x.content.some((part) => part.type === "input_image"), + ) + + const isAgentCall = + Array.isArray(payload.input) + && payload.input.some((msg) => ["assistant", "tool"].includes(msg.role)) + + const headers: Record = { + ...copilotHeaders(state, enableVision), + "X-Initiator": isAgentCall ? "agent" : "user", + } + + const response = await fetch(`${copilotBaseUrl(state)}/responses`, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + consola.error("Failed to create response", response) + throw new HTTPError("Failed to create response", response) + } + + if (payload.stream) { + return events(response) + } + + return (await response.json()) as ResponseObject +} + +// Payload types + +export interface ResponsesPayload { + model: string + input: string | Array + stream?: boolean | null + temperature?: number | null + top_p?: number | null + max_output_tokens?: number | null + tools?: Array | null + tool_choice?: + | "auto" + | "none" + | "required" + | { type: "function"; name: string } + | null + previous_response_id?: string | null + instructions?: string | null + reasoning?: { effort: "low" | "medium" | "high" } | null + metadata?: Record | null + user?: string | null +} + +export interface InputMessage { + role: "user" | "assistant" | "system" | "developer" | "tool" + content: string | Array + name?: string + tool_call_id?: string +} + +export type InputContentPart = InputTextPart | InputImagePart | InputFilePart + +export interface InputTextPart { + type: "input_text" + text: string +} + +export interface InputImagePart { + type: "input_image" + image_url?: { url: string; detail?: "low" | "high" | "auto" } + file_id?: string +} + +export interface InputFilePart { + type: "input_file" + file_id?: string + file_url?: string + filename?: string +} + +export interface ResponseTool { + type: "function" + name: string + description?: string + parameters?: Record + strict?: boolean | null +} + +// Response types (non-streaming) + +export interface ResponseObject { + id: string + object: "response" + created_at: number + model: string + output: Array + status: "completed" | "incomplete" | "failed" | "cancelled" + usage?: ResponseUsage + instructions?: string | null + error?: ResponseError | null + metadata?: Record | null +} + +export type OutputItem = MessageOutputItem | FunctionCallOutputItem + +export interface MessageOutputItem { + type: "message" + id: string + role: "assistant" + content: Array + status: "completed" | "incomplete" +} + +export type OutputContentPart = OutputTextPart | RefusalPart + +export interface OutputTextPart { + type: "output_text" + text: string + annotations?: Array +} + +export interface RefusalPart { + type: "refusal" + refusal: string +} + +export interface FunctionCallOutputItem { + type: "function_call" + id: string + call_id: string + name: string + arguments: string + status: "completed" +} + +export interface ResponseUsage { + input_tokens: number + output_tokens: number + total_tokens: number + input_tokens_details?: { + cached_tokens: number + } + output_tokens_details?: { + reasoning_tokens: number + } +} + +export interface ResponseError { + code: string + message: string +}