feat: add OpenAI Responses API endpoint (/responses and /v1/responses)

Copilot · web-flow · commit ffd19ffe4910 · 2026-04-21T22:20:26.000Z
diff --git a/bun.lock b/bun.lock
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
@@ -0,0 +1,61 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import { isNullish } from "~/lib/utils"
+import {
+  createResponses,
+  type ResponseObject,
+  type ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+export async function handleResponse(c: Context) {
+  await checkRateLimit(state)
+
+  let payload = await c.req.json<ResponsesPayload>()
+  consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
+
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === payload.model,
+  )
+
+  if (isNullish(payload.max_output_tokens)) {
+    payload = {
+      ...payload,
+      max_output_tokens: selectedModel?.capabilities.limits.max_output_tokens,
+    }
+    consola.debug(
+      "Set max_output_tokens to:",
+      JSON.stringify(payload.max_output_tokens),
+    )
+  }
+
+  if (state.manualApprove) await awaitApproval()
+
+  const response = await createResponses(payload)
+
+  if (isNonStreaming(response)) {
+    consola.debug("Non-streaming response:", JSON.stringify(response))
+    return c.json(response)
+  }
+
+  consola.debug("Streaming response")
+  return streamSSE(c, async (stream) => {
+    for await (const chunk of response) {
+      consola.debug("Streaming chunk:", JSON.stringify(chunk))
+      if (!chunk.data) continue
+      await stream.writeSSE({
+        event: chunk.event,
+        data: chunk.data,
+      })
+    }
+  })
+}
+
+const isNonStreaming = (
+  response: Awaited<ReturnType<typeof createResponses>>,
+): response is ResponseObject => Object.hasOwn(response, "output")
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleResponse } from "./handler"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    return await handleResponse(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/server.ts b/src/server.ts
@@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -19,13 +20,15 @@ server.get("/", (c) => c.text("Server running"))
 server.route("/chat/completions", completionRoutes)
 server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
+server.route("/responses", responsesRoutes)
 server.route("/usage", usageRoute)
 server.route("/token", tokenRoute)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/responses", responsesRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
@@ -0,0 +1,166 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export const createResponses = async (payload: ResponsesPayload) => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const enableVision =
+    Array.isArray(payload.input)
+    && payload.input.some(
+      (x) =>
+        Array.isArray(x.content)
+        && x.content.some((part) => part.type === "input_image"),
+    )
+
+  const isAgentCall =
+    Array.isArray(payload.input)
+    && payload.input.some((msg) => ["assistant", "tool"].includes(msg.role))
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, enableVision),
+    "X-Initiator": isAgentCall ? "agent" : "user",
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create response", response)
+    throw new HTTPError("Failed to create response", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponseObject
+}
+
+// Payload types
+
+export interface ResponsesPayload {
+  model: string
+  input: string | Array<InputMessage>
+  stream?: boolean | null
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  tools?: Array<ResponseTool> | null
+  tool_choice?:
+    | "auto"
+    | "none"
+    | "required"
+    | { type: "function"; name: string }
+    | null
+  previous_response_id?: string | null
+  instructions?: string | null
+  reasoning?: { effort: "low" | "medium" | "high" } | null
+  metadata?: Record<string, string> | null
+  user?: string | null
+}
+
+export interface InputMessage {
+  role: "user" | "assistant" | "system" | "developer" | "tool"
+  content: string | Array<InputContentPart>
+  name?: string
+  tool_call_id?: string
+}
+
+export type InputContentPart = InputTextPart | InputImagePart | InputFilePart
+
+export interface InputTextPart {
+  type: "input_text"
+  text: string
+}
+
+export interface InputImagePart {
+  type: "input_image"
+  image_url?: { url: string; detail?: "low" | "high" | "auto" }
+  file_id?: string
+}
+
+export interface InputFilePart {
+  type: "input_file"
+  file_id?: string
+  file_url?: string
+  filename?: string
+}
+
+export interface ResponseTool {
+  type: "function"
+  name: string
+  description?: string
+  parameters?: Record<string, unknown>
+  strict?: boolean | null
+}
+
+// Response types (non-streaming)
+
+export interface ResponseObject {
+  id: string
+  object: "response"
+  created_at: number
+  model: string
+  output: Array<OutputItem>
+  status: "completed" | "incomplete" | "failed" | "cancelled"
+  usage?: ResponseUsage
+  instructions?: string | null
+  error?: ResponseError | null
+  metadata?: Record<string, string> | null
+}
+
+export type OutputItem = MessageOutputItem | FunctionCallOutputItem
+
+export interface MessageOutputItem {
+  type: "message"
+  id: string
+  role: "assistant"
+  content: Array<OutputContentPart>
+  status: "completed" | "incomplete"
+}
+
+export type OutputContentPart = OutputTextPart | RefusalPart
+
+export interface OutputTextPart {
+  type: "output_text"
+  text: string
+  annotations?: Array<unknown>
+}
+
+export interface RefusalPart {
+  type: "refusal"
+  refusal: string
+}
+
+export interface FunctionCallOutputItem {
+  type: "function_call"
+  id: string
+  call_id: string
+  name: string
+  arguments: string
+  status: "completed"
+}
+
+export interface ResponseUsage {
+  input_tokens: number
+  output_tokens: number
+  total_tokens: number
+  input_tokens_details?: {
+    cached_tokens: number
+  }
+  output_tokens_details?: {
+    reasoning_tokens: number
+  }
+}
+
+export interface ResponseError {
+  code: string
+  message: string
+}