From f863e76115459663dbc8d259a7b75d6a92c527d7 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Tue, 17 Jun 2025 15:35:41 +0700
Subject: [PATCH 01/25] docs: Update warning message in README

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 4645336ed..352b87ac6 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Copilot API
 
-⚠️ **EDUCATIONAL PURPOSE ONLY** ⚠️
-This project is a reverse-engineered implementation of the GitHub Copilot API created for educational purposes only. It is not officially supported by GitHub and should not be used in production environments.
+> [!WARNING]
+> This is a reverse-engineered and unofficial implementation of the GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
 [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/E1E519XS7W)
 

From e8e28abe0a88285f9daaa51939d48dbc970b1630 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Tue, 17 Jun 2025 15:37:56 +0700
Subject: [PATCH 02/25] docs: fix ai writing

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 352b87ac6..b067f25d0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Copilot API
 
 > [!WARNING]
-> This is a reverse-engineered and unofficial implementation of the GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
+> This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
 [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/E1E519XS7W)
 

From 3aeaa8ff7d6e3272cb9536d167359ad596e864e7 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 02:09:01 +0700
Subject: [PATCH 03/25] feat: better openai types

---
 .../copilot/create-chat-completions.ts        | 117 ++++++++++++------
 1 file changed, 81 insertions(+), 36 deletions(-)

diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 7d54d11f0..4cf79e48d 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -9,14 +9,10 @@ export const createChatCompletions = async (
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
-  for (const message of payload.messages) {
-    intoCopilotMessage(message)
-  }
-
   const visionEnable = payload.messages.some(
     (x) =>
       typeof x.content !== "string"
-      && x.content.some((x) => x.type === "image_url"),
+      && x.content?.some((x) => x.type === "image_url"),
   )
 
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
@@ -35,33 +31,35 @@ export const createChatCompletions = async (
   return (await response.json()) as ChatCompletionResponse
 }
 
-const intoCopilotMessage = (message: Message) => {
-  if (typeof message.content === "string") return false
-
-  for (const part of message.content) {
-    if (part.type === "input_image") part.type = "image_url"
-  }
-}
-
 // Streaming types
 
 export interface ChatCompletionChunk {
-  choices: [Choice]
-  created: number
-  object: "chat.completion.chunk"
   id: string
+  object: "chat.completion.chunk"
+  created: number
   model: string
+  choices: [Choice]
+  system_fingerprint?: string
 }
 
 interface Delta {
-  content?: string
-  role?: string
+  content?: string | null
+  role?: "user" | "assistant" | "system" | "tool"
+  tool_calls?: Array<{
+    index: number
+    id?: string
+    type?: "function"
+    function?: {
+      name?: string
+      arguments?: string
+    }
+  }>
 }
 
 interface Choice {
   index: number
   delta: Delta
-  finish_reason: "stop" | null
+  finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null
   logprobs: null
 }
 
@@ -69,17 +67,23 @@ interface Choice {
 
 export interface ChatCompletionResponse {
   id: string
-  object: string
+  object: "chat.completion"
   created: number
   model: string
   choices: [ChoiceNonStreaming]
+  system_fingerprint?: string
+  usage?: {
+    prompt_tokens: number
+    completion_tokens: number
+    total_tokens: number
+  }
 }
 
 interface ChoiceNonStreaming {
   index: number
   message: Message
   logprobs: null
-  finish_reason: "stop"
+  finish_reason: "stop" | "length" | "tool_calls" | "content_filter"
 }
 
 // Payload types
@@ -87,25 +91,66 @@ interface ChoiceNonStreaming {
 export interface ChatCompletionsPayload {
   messages: Array<Message>
   model: string
-  temperature?: number
-  top_p?: number
-  max_tokens?: number
-  stop?: Array<string>
-  n?: number
-  stream?: boolean
+  temperature?: number | null
+  top_p?: number | null
+  max_tokens?: number | null
+  stop?: string | Array<string> | null
+  n?: number | null
+  stream?: boolean | null
+
+  frequency_penalty?: number | null
+  presence_penalty?: number | null
+  logit_bias?: Record<string, number> | null
+  logprobs?: boolean | null
+  response_format?: { type: "json_object" } | null
+  seed?: number | null
+  tools?: Array<Tool> | null
+  tool_choice?:
+    | "none"
+    | "auto"
+    | { type: "function"; function: { name: string } }
+    | null
+  user?: string | null
+}
+
+export interface Tool {
+  type: "function"
+  function: {
+    name: string
+    description?: string
+    parameters: Record<string, unknown>
+  }
 }
 
 export interface Message {
-  role: "user" | "assistant" | "system"
-  content: string | Array<ContentPart>
+  role: "user" | "assistant" | "system" | "tool"
+  content: string | Array<ContentPart> | null
+
+  name?: string
+  tool_calls?: Array<ToolCall>
+  tool_call_id?: string
+}
+
+export interface ToolCall {
+  id: string
+  type: "function"
+  function: {
+    name: string
+    arguments: string
+  }
 }
 
-// https://platform.openai.com/docs/api-reference
+export type ContentPart = TextPart | ImagePart
 
-export interface ContentPart {
-  type: "input_image" | "input_text" | "image_url"
-  text?: string
-  image_url?: string
+export interface TextPart {
+  type: "text"
+  text: string
+}
+
+export interface ImagePart {
+  type: "image_url"
+  image_url: {
+    url: string
+    detail?: "low" | "high" | "auto"
+  }
 }
-// https://platform.openai.com/docs/guides/images-vision#giving-a-model-images-as-input
-// Note: copilot use "image_url", but openai use "input_image"

From a8f5f225cea85524bfeeeb6b3a779a3a9a1599db Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 09:40:47 +0700
Subject: [PATCH 04/25] feat: Add Anthropic API documentation and OpenAI
 mapping

---
 docs/anthropic.md                          | 148 ++++++++++
 docs/mapping.md                            | 119 ++++++++
 docs/openai.md                             | 192 +++++++++++++
 src/lib/tokenizer.ts                       |  34 ++-
 src/routes/messages/handler.ts             |   3 +
 src/routes/messages/openai-to-anthropic.ts | 301 +++++++++++++++++++++
 src/routes/messages/route.ts               |  15 +
 7 files changed, 806 insertions(+), 6 deletions(-)
 create mode 100644 docs/anthropic.md
 create mode 100644 docs/mapping.md
 create mode 100644 docs/openai.md
 create mode 100644 src/routes/messages/handler.ts
 create mode 100644 src/routes/messages/openai-to-anthropic.ts
 create mode 100644 src/routes/messages/route.ts

diff --git a/docs/anthropic.md b/docs/anthropic.md
new file mode 100644
index 000000000..b391aec2d
--- /dev/null
+++ b/docs/anthropic.md
@@ -0,0 +1,148 @@
+# Claude API Reference
+
+This document provides a condensed overview of the Anthropic Claude API, covering messages, token counting, and model management.
+
+---
+
+## Messages API
+
+The Messages API is the primary way to interact with Claude for multi-turn conversations and single queries.
+
+### Create a Message
+
+Creates a model response for the given conversation.
+
+**Endpoint:** `POST /v1/messages`
+
+#### Request Body
+
+The request body is a JSON object.
+
+| Parameter        | Type            | Required | Description                                                                                                                                         |
+| :--------------- | :-------------- | :------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`          | string          | Yes      | The model that will complete your prompt. Example: `claude-3-7-sonnet-20250219`.                                                                    |
+| `messages`       | array           | Yes      | A list of input messages comprising the conversation so far. See [The Message Object](https://www.google.com/search?q=%23the-message-object) below. |
+| `max_tokens`     | integer         | Yes      | The maximum number of tokens to generate. Different models have different maximums.                                                                 |
+| `system`         | string or array | No       | A system prompt to provide context and instructions to Claude, such as specifying a role or goal.                                                   |
+| `metadata`       | object          | No       | An object for metadata, such as a `user_id`, to help detect abuse. Do not include any personally identifying information.                           |
+| `stop_sequences` | array           | No       | Custom text sequences that will cause the model to stop generating.                                                                                 |
+| `stream`         | boolean         | No       | If set, the response will be incrementally streamed using server-sent events. Defaults to `false`.                                                  |
+| `temperature`    | number          | No       | The amount of randomness injected into the response, ranging from `0.0` to `1.0`. Defaults to `1.0`.                                                |
+| `top_p`          | number          | No       | Use nucleus sampling. The model considers tokens with `top_p` probability mass. Should alter `temperature` or `top_p`, but not both.                |
+| `top_k`          | integer         | No       | Only sample from the top K options for each subsequent token. Recommended for advanced use cases.                                                   |
+| `tools`          | array           | No       | A list of tools the model may use. See [The Tool Object](https://www.google.com/search?q=%23the-tool-object) below.                                 |
+| `tool_choice`    | object          | No       | Controls how the model should use the provided tools. Can be `auto`, `any`, `tool`, or `none`.                                                      |
+
+#### The Message Object
+
+The `messages` array consists of message objects, where each object has a `role` and `content`. Models are trained on alternating `user` and `assistant` turns.
+
+| Parameter | Type            | Required | Description                                                                                                                         |
+| :-------- | :-------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------- |
+| `role`    | string          | Yes      | The role of the message author. Must be either `user` or `assistant`.                                                               |
+| `content` | string or array | Yes      | The content of the message. This can be a simple string or an array of content blocks for multimodal input (e.g., text and images). |
+
+**Content Blocks:** For multimodal input, the `content` array can contain different types of blocks.
+
+- **`text`**: A block with a `type` of "text" and a `text` field containing the string.
+- **`image`**: A block with a `type` of "image" and a `source` object. The source must specify its `type` (e.g., "base64"), `media_type` (e.g., "image/jpeg"), and `data`.
+- **`tool_result`**: A block used to return the output of a tool back to the model. It includes the `tool_use_id`, `content`, and an optional `is_error` flag.
+
+#### The Tool Object
+
+The `tools` array allows you to define client-side tools the model can call.
+
+| Parameter      | Type   | Required | Description                                                                                               |
+| :------------- | :----- | :------- | :-------------------------------------------------------------------------------------------------------- |
+| `name`         | string | Yes      | The name of the tool, matching `^[a-zA-Z0-9_-]{1,64}$`.                                                   |
+| `description`  | string | No       | A detailed description of what the tool does, which helps the model decide when to use it.                |
+| `input_schema` | object | Yes      | A [JSON Schema](https://json-schema.org/draft/2020-12) object describing the parameters the tool accepts. |
+
+#### Response (200 OK)
+
+A successful non-streaming request returns a `Message` object.
+
+| Parameter       | Type   | Description                                                                                                               |
+| :-------------- | :----- | :------------------------------------------------------------------------------------------------------------------------ |
+| `id`            | string | A unique identifier for the message object.                                                                               |
+| `type`          | string | The object type, which is always `message`.                                                                               |
+| `role`          | string | The role of the author, which is always `assistant`.                                                                      |
+| `content`       | array  | An array of content blocks generated by the model (e.g., `text` or `tool_use`).                                           |
+| `model`         | string | The model that handled the request.                                                                                       |
+| `stop_reason`   | string | The reason the model stopped generating tokens. Can be `end_turn`, `max_tokens`, `stop_sequence`, or `tool_use`.          |
+| `stop_sequence` | string | If the model was stopped by a stop sequence, this field will contain which sequence was generated. Can be null.           |
+| `usage`         | object | An object containing token usage statistics. See [The Usage Object](https://www.google.com/search?q=%23the-usage-object). |
+
+#### The Usage Object
+
+The `usage` object details billing and rate-limit token counts.
+
+| Parameter       | Type    | Description                            |
+| :-------------- | :------ | :------------------------------------- |
+| `input_tokens`  | integer | The number of input tokens used.       |
+| `output_tokens` | integer | The number of output tokens generated. |
+
+### Count Message Tokens
+
+Calculates the number of tokens for a given set of messages without executing the model.
+
+**Endpoint:** `POST /v1/messages/count_tokens`
+
+#### Request Body
+
+The request accepts a subset of the "Create a Message" parameters.
+
+| Parameter  | Type            | Required | Description                          |
+| :--------- | :-------------- | :------- | :----------------------------------- |
+| `model`    | string          | Yes      | The model that would be used.        |
+| `messages` | array           | Yes      | A list of input messages.            |
+| `system`   | string or array | No       | A system prompt.                     |
+| `tools`    | array           | No       | A list of tools the model could use. |
+
+#### Response (200 OK)
+
+A successful request returns a JSON object.
+
+| Parameter      | Type    | Description                                                                     |
+| :------------- | :------ | :------------------------------------------------------------------------------ |
+| `input_tokens` | integer | The total number of tokens counted from the messages, system prompt, and tools. |
+
+---
+
+## Models API
+
+The Models API allows you to list and retrieve information about available models.
+
+### List Models
+
+Lists the currently available models, with the most recent models appearing first.
+
+**Endpoint:** `GET /v1/models`
+
+#### Response (200 OK)
+
+A successful request returns a list of model objects.
+
+| Parameter  | Type    | Description                                                                     |
+| :--------- | :------ | :------------------------------------------------------------------------------ |
+| `data`     | array   | A list of [Model Objects](https://www.google.com/search?q=%23the-model-object). |
+| `has_more` | boolean | Indicates if more results are available for pagination.                         |
+
+### Get a Model
+
+Retrieves a specific model instance by its ID or alias.
+
+**Endpoint:** `GET /v1/models/{model_id}`
+
+#### Response (200 OK)
+
+A successful request returns a single [Model Object](https://www.google.com/search?q=%23the-model-object).
+
+#### The Model Object
+
+| Parameter      | Type   | Description                                                         |
+| :------------- | :----- | :------------------------------------------------------------------ |
+| `id`           | string | The unique model identifier. Example: `claude-3-7-sonnet-20250219`. |
+| `type`         | string | The object type, which is always `model`.                           |
+| `display_name` | string | A human-readable name for the model. Example: `Claude 3.7 Sonnet`.  |
+| `created_at`   | string | An RFC 3339 datetime string of when the model was released.         |
diff --git a/docs/mapping.md b/docs/mapping.md
new file mode 100644
index 000000000..2870da23c
--- /dev/null
+++ b/docs/mapping.md
@@ -0,0 +1,119 @@
+Of course. Here is the updated and corrected mapping document, now including the "Models" and "Token Count" endpoints.
+
+---
+
+### **Comprehensive API Translation: Anthropic Messages & OpenAI Chat Completions**
+
+This document provides a detailed, side-by-side technical mapping of the Anthropic Messages API and the OpenAI Chat Completions API, based on the provided API specifications.
+
+---
+
+### **1. API Endpoints & Authentication**
+
+| Feature         | Anthropic Messages API    | OpenAI Chat Completions API          |
+| :-------------- | :------------------------ | :----------------------------------- |
+| **Endpoint**    | `POST /v1/messages`       | `POST /v1/chat/completions`          |
+| **Auth Header** | `x-api-key: YOUR_API_KEY` | `Authorization: Bearer YOUR_API_KEY` |
+
+---
+
+### **2. Core Request Parameters**
+
+| Parameter           | Anthropic Messages API                       | OpenAI Chat Completions API                                         |
+| :------------------ | :------------------------------------------- | :------------------------------------------------------------------ |
+| **Model**           | `model` (e.g., `claude-3-7-sonnet-20250219`) | `model` (e.g., `gpt-4o`)                                            |
+| **System Prompt**   | `system` (A top-level string)                | Prepending a message with `role: "system"` to the `messages` array. |
+| **Max Tokens**      | `max_tokens` (integer)                       | `max_tokens` (integer)                                              |
+| **Stop Sequences**  | `stop_sequences` (array of strings)          | `stop` (array of strings)                                           |
+| **Streaming**       | `stream` (boolean)                           | `stream` (boolean)                                                  |
+| **Temperature**     | `temperature` (0.0 to 1.0)                   | `temperature` (0.0 to 2.0)                                          |
+| **Top P**           | `top_p` (0.0 to 1.0)                         | `top_p` (0.0 to 1.0)                                                |
+| **Top K**           | `top_k` (integer)                            | **Not Supported**                                                   |
+| **User Identifier** | `metadata.user_id` (string)                  | `user` (string)                                                     |
+
+---
+
+### **3. Message Structure**
+
+Both APIs use a `messages` array, but the structure and content types differ.
+
+#### **3.1. Message Roles**
+
+| Role              | Anthropic Messages API                        | OpenAI Chat Completions API |
+| :---------------- | :-------------------------------------------- | :-------------------------- |
+| **User**          | `user`                                        | `user`                      |
+| **Assistant**     | `assistant`                                   | `assistant`                 |
+| **System**        | Handled via the top-level `system` parameter. | `system`                    |
+| **Tool/Function** | A `user` message with `tool_result` content.  | `tool`                      |
+
+#### **3.2. Message Content Types**
+
+| Content Type    | Anthropic Messages API                                                                                             | OpenAI Chat Completions API                                                                                  |
+| :-------------- | :----------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------- |
+| **Text**        | `content` can be a single string or an array containing `{"type": "text", "text": "..."}`.                         | A message object's `content` property is a string, or an array containing `{"type": "text", "text": "..."}`. |
+| **Image**       | `content` array can contain `{"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}`. | `content` array can contain `{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}`.     |
+| **Tool Result** | A `user` message `content` array can contain `{"type": "tool_result", "tool_use_id": "...", "content": "..."}`.    | A distinct message object with `{"role": "tool", "tool_call_id": "...", "content": "..."}`.                  |
+
+---
+
+### **4. Tool & Function Handling**
+
+| Feature                   | Anthropic Messages API                                                                                                                                       | OpenAI Chat Completions API                                                                                                                                                             |
+| :------------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Tool Definition**       | `tools` array. Each tool has `name`, `description`, and `input_schema`.                                                                                      | `tools` array. Each tool has `type: "function"` and a `function` object with `name`, `description`, and `parameters` (JSON Schema).                                                     |
+| **Tool Choice**           | `tool_choice` object with `type`: \<br\> - `"auto"`: Model decides. \<br\> - `"any"`: Forces model to use a tool. \<br\> - `"tool"`: Forces a specific tool. | `tool_choice` string or object: \<br\> - `"auto"`: Model decides. \<br\> - `"required"`: Forces model to call a tool. \<br\> - `{"type": "function", ...}`: Forces a specific function. |
+| **Tool Call in Response** | Appears in the `content` array as `{"type": "tool_use", "id": "...", "name": "...", "input": {...}}`.                                                        | Appears in the `message` object as a `tool_calls` array, with each call having an `id` and a `function` object with `name` and `arguments` (as a JSON string).                          |
+
+---
+
+### **5. Response Structure**
+
+| Feature              | Anthropic Messages API                                                                                 | OpenAI Chat Completions API                                                                                  |
+| :------------------- | :----------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------- |
+| **Primary Object**   | A single response object.                                                                              | A `choices` array containing one or more message objects.                                                    |
+| **Stop Reason**      | `stop_reason` field with values like `end_turn`, `max_tokens`, `tool_use`, `stop_sequence`, `refusal`. | `finish_reason` field within each choice, with values like `stop`, `length`, `tool_calls`, `content_filter`. |
+| **Usage Statistics** | `usage` object with `input_tokens` and `output_tokens`.                                                | `usage` object with `prompt_tokens`, `completion_tokens`, and `total_tokens`.                                |
+
+---
+
+### **6. Model & Tokenization Endpoints**
+
+#### **6.1. List Available Models**
+
+| Feature           | Anthropic Messages API                      | OpenAI Chat Completions API            |
+| :---------------- | :------------------------------------------ | :------------------------------------- |
+| **Endpoint**      | `GET /v1/models`                            | `GET /v1/models`                       |
+| **Response**      | Paginated list in `data` array.             | List in `data` array.                  |
+| **Object Fields** | `id`, `display_name`, `created_at`, `type`. | `id`, `created`, `owned_by`, `object`. |
+
+#### **6.2. Retrieve a Specific Model**
+
+| Feature           | Anthropic Messages API                      | OpenAI Chat Completions API            |
+| :---------------- | :------------------------------------------ | :------------------------------------- |
+| **Endpoint**      | `GET /v1/models/{model_id}`                 | `GET /v1/models/{model}`               |
+| **Response**      | A single `ModelInfo` object.                | A single `Model` object.               |
+| **Object Fields** | `id`, `display_name`, `created_at`, `type`. | `id`, `created`, `owned_by`, `object`. |
+
+#### **6.3. Count Tokens**
+
+| Feature           | Anthropic Messages API                                                                                  | OpenAI Chat Completions API                                                                                                                              |
+| :---------------- | :------------------------------------------------------------------------------------------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Endpoint**      | `POST /v1/messages/count_tokens`                                                                        | **No Direct API Endpoint**                                                                                                                               |
+| **Functionality** | Counts tokens for a given message payload, including images and tools, without generating a completion. | Token counts are returned in the `usage` object only after a completion is generated. Client-side libraries like `tiktoken` must be used for estimation. |
+| **Response**      | `{"input_tokens": ...}`                                                                                 | N/A                                                                                                                                                      |
+
+---
+
+### **7. Streaming & Error Handling**
+
+- **Streaming:** Both APIs use Server-Sent Events (SSE). A translation layer must convert OpenAI's stream of `chat.completion.chunk` objects into Anthropic's more granular, named-event stream (`message_start`, `content_block_delta`, etc.).
+- **Error Handling:** Error responses are structurally similar, containing a main `error` object. HTTP status codes generally correspond (e.g., 400 for bad requests, 401 for auth issues, 429 for rate limits).
+
+---
+
+### **8. Summary of Key Differences**
+
+- **Token Counting:** Anthropic provides a dedicated API endpoint for counting tokens before sending a request, while OpenAI does not.
+- **`top_k`:** Supported by Anthropic for request sampling, but not by OpenAI's Chat Completions API.
+- **Model Information:** The APIs return different metadata for their models. Anthropic provides a `display_name`, whereas OpenAI provides `owned_by`.
+- **Partial Assistant Prefill:** Anthropic allows providing a prefix for the assistant's response, a feature OpenAI does not support.
diff --git a/docs/openai.md b/docs/openai.md
new file mode 100644
index 000000000..47f5bf85a
--- /dev/null
+++ b/docs/openai.md
@@ -0,0 +1,192 @@
+# Create Chat Completion
+
+Creates a model response for the given chat conversation.
+
+**Endpoint:** `POST /v1/chat/completions`
+
+### Summary
+
+This endpoint generates a model response for a given conversation. It is a highly flexible endpoint that supports text generation, vision capabilities, and function calling.
+
+**Recommendation:** For new projects, it is recommended to use the [Responses API](/docs/api-reference/responses) to leverage the latest platform features. You can find a comparison here: [Chat Completions vs. Responses](/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+---
+
+## Request Body
+
+The request body must be a JSON object with the following parameters:
+
+| Parameter           | Type             | Required | Description                                                                                                                                                                                                                                            |
+| ------------------- | ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `messages`          | array            | Yes      | A list of messages comprising the conversation so far. See the [Message Object](#the-message-object) section below.                                                                                                                                    |
+| `model`             | string           | Yes      | ID of the model to use. See the [model overview](/docs/models) for available models.                                                                                                                                                                   |
+| `frequency_penalty` | number           | No       | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. Defaults to 0.                                              |
+| `logit_bias`        | map              | No       | A map to modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps token IDs (as keys) to an associated bias value from -100 to 100.                                                                      |
+| `logprobs`          | boolean          | No       | Whether to return log probabilities of the output tokens. If true, returns the log probabilities of each output token in the `content` of `message`. Defaults to `false`.                                                                              |
+| `max_tokens`        | integer          | No       | The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. **(Deprecated in favor of `max_completion_tokens` on newer models)**                  |
+| `n`                 | integer          | No       | How many chat completion choices to generate for each input message. Defaults to 1.                                                                                                                                                                    |
+| `presence_penalty`  | number           | No       | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. Defaults to 0.                                                           |
+| `response_format`   | object           | No       | An object specifying the format that the model must output. For example, `{"type": "json_object"}`.                                                                                                                                                    |
+| `seed`              | integer          | No       | (Beta) If specified, the system will make a best effort to sample deterministically.                                                                                                                                                                   |
+| `stop`              | string or array  | No       | Up to 4 sequences where the API will stop generating further tokens.                                                                                                                                                                                   |
+| `stream`            | boolean          | No       | If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available. Defaults to `false`.                                                                                       |
+| `temperature`       | number           | No       | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. Defaults to 1.                                                    |
+| `top_p`             | number           | No       | An alternative to sampling with temperature, called nucleus sampling. The model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. Defaults to 1. |
+| `tools`             | array            | No       | A list of tools the model may call. See the [Tool Object](#the-tool-object) section below.                                                                                                                                                             |
+| `tool_choice`       | string or object | No       | Controls which, if any, tool is called by the model. Can be `none`, `auto`, `required`, or specify a particular function like `{"type": "function", "function": {"name": "my_function"}}`.                                                             |
+| `user`              | string           | No       | A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.                                                                                                                                                     |
+
+---
+
+### The Message Object
+
+The `messages` array consists of message objects, where each object has a `role` and `content`.
+
+| Parameter      | Type            | Required | Description                                                                                            |
+| -------------- | --------------- | -------- | ------------------------------------------------------------------------------------------------------ |
+| `role`         | string          | Yes      | The role of the author of this message. Can be `developer`, `system`, `user`, `assistant`, or `tool`.  |
+| `content`      | string or array | Yes      | The contents of the message. This can be a string or an array of content parts (for multimodal input). |
+| `name`         | string          | No       | An optional name for the participant, providing differentiation for participants of the same role.     |
+| `tool_calls`   | array           | No       | The tool calls generated by the model, if any.                                                         |
+| `tool_call_id` | string          | No       | The ID of the tool call that this message is responding to. (Required if `role` is `tool`).            |
+
+#### User Message Content Parts (Multimodal)
+
+When the `content` of a `user` message is an array, it can contain a mix of text and image parts.
+
+| Type        | Description                                                                                                             |
+| ----------- | ----------------------------------------------------------------------------------------------------------------------- |
+| `text`      | A text part, containing the string of text.                                                                             |
+| `image_url` | An image part, containing a URL or base64-encoded image data and an optional `detail` level (`low`, `high`, or `auto`). |
+
+### The Tool Object
+
+The `tools` array allows you to define functions the model can call.
+
+| Parameter  | Type   | Required | Description                                                |
+| ---------- | ------ | -------- | ---------------------------------------------------------- |
+| `type`     | string | Yes      | The type of tool. Currently, only `function` is supported. |
+| `function` | object | Yes      | An object defining the function. See below.                |
+
+#### The Function Object
+
+| Parameter     | Type   | Required | Description                                                                                                                   |
+| ------------- | ------ | -------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| `name`        | string | Yes      | The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. |
+| `description` | string | No       | A description of what the function does, used by the model to decide when to call it.                                         |
+| `parameters`  | object | No       | The parameters the function accepts, described as a JSON Schema object.                                                       |
+
+---
+
+## Responses
+
+### Successful Response (200 OK)
+
+A successful non-streaming request returns a JSON object with the following structure.
+
+| Parameter            | Type    | Description                                                                             |
+| -------------------- | ------- | --------------------------------------------------------------------------------------- |
+| `id`                 | string  | A unique identifier for the chat completion.                                            |
+| `object`             | string  | The object type, which is always `chat.completion`.                                     |
+| `created`            | integer | The Unix timestamp (in seconds) of when the completion was created.                     |
+| `model`              | string  | The model used for the chat completion.                                                 |
+| `choices`            | array   | A list of chat completion choices. See [The Choice Object](#the-choice-object).         |
+| `usage`              | object  | Usage statistics for the completion request. See [The Usage Object](#the-usage-object). |
+| `system_fingerprint` | string  | This fingerprint represents the backend configuration that the model runs with.         |
+
+#### The Choice Object
+
+| Parameter       | Type    | Description                                                                                                 |
+| --------------- | ------- | ----------------------------------------------------------------------------------------------------------- |
+| `index`         | integer | The index of the choice in the list of choices.                                                             |
+| `message`       | object  | A message object containing the generated response. See below.                                              |
+| `finish_reason` | string  | The reason the model stopped generating tokens. Can be `stop`, `length`, `tool_calls`, or `content_filter`. |
+| `logprobs`      | object  | Log probability information for the choice. Null if `logprobs` was not requested.                           |
+
+#### The Response Message Object
+
+| Parameter    | Type   | Description                                                                                                                                            |
+| ------------ | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `role`       | string | The role of the author, which will be `assistant`.                                                                                                     |
+| `content`    | string | The text content of the message. Can be null if `tool_calls` are present.                                                                              |
+| `tool_calls` | array  | The tool calls generated by the model, if any. Each object contains an `id`, `type` ('function'), and a `function` object with `name` and `arguments`. |
+
+#### The Usage Object
+
+| Parameter           | Type    | Description                                   |
+| ------------------- | ------- | --------------------------------------------- |
+| `prompt_tokens`     | integer | Number of tokens in the prompt.               |
+| `completion_tokens` | integer | Number of tokens in the generated completion. |
+| `total_tokens`      | integer | Total number of tokens used in the request.   |
+
+### Streaming Response (200 OK)
+
+If `stream: true` is set, the API streams back a sequence of server-sent events.
+
+Each event is a JSON object representing a `chat.completion.chunk`.
+
+#### The Chat Completion Chunk Object
+
+| Parameter | Type    | Description                                                              |
+| --------- | ------- | ------------------------------------------------------------------------ |
+| `id`      | string  | A unique identifier for the chat completion. Each chunk has the same ID. |
+| `object`  | string  | The object type, which is always `chat.completion.chunk`.                |
+| `created` | integer | The Unix timestamp of when the completion was created.                   |
+| `model`   | string  | The model used for the completion.                                       |
+| `choices` | array   | A list of choices, where each choice contains a `delta` object.          |
+
+#### The Delta Object
+
+The `delta` object contains the fields that have changed. It can include:
+
+- `role`: The role of the message author.
+- `content`: A partial string of the message content.
+- `tool_calls`: A partial list of tool calls, including the function `name` and partial `arguments`.
+
+The stream is terminated by a `data: [DONE]` message.
+
+---
+
+# Models
+
+List and describe the various models available in the API.
+
+## List Models
+
+Lists the currently available models, and provides basic information about each one such as the owner and availability.
+
+**Endpoint:** `GET /models`
+
+### Response Body
+
+A successful request returns a list of model objects.
+
+| Parameter | Type   | Description                              |
+| --------- | ------ | ---------------------------------------- |
+| `object`  | string | The object type, which is always "list". |
+| `data`    | array  | A list of model objects.                 |
+
+#### The Model Object
+
+| Parameter  | Type    | Description                                                     |
+| ---------- | ------- | --------------------------------------------------------------- |
+| `id`       | string  | The model identifier, which can be referenced in API endpoints. |
+| `object`   | string  | The object type, which is always "model".                       |
+| `created`  | integer | The Unix timestamp (in seconds) when the model was created.     |
+| `owned_by` | string  | The organization that owns the model.                           |
+
+## Retrieve a Model
+
+Retrieves a model instance, providing basic information about the model such as the owner and permissioning.
+
+**Endpoint:** `GET /models/{model}`
+
+### Path Parameters
+
+| Parameter | Type   | Required | Description                                  |
+| --------- | ------ | -------- | -------------------------------------------- |
+| `model`   | string | Yes      | The ID of the model to use for this request. |
+
+### Response Body
+
+A successful request returns a single [Model Object](#the-model-object).
diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
index 98797c6b6..73cd499f9 100644
--- a/src/lib/tokenizer.ts
+++ b/src/lib/tokenizer.ts
@@ -3,13 +3,35 @@ import { countTokens } from "gpt-tokenizer/model/gpt-4o"
 import type { Message } from "~/services/copilot/create-chat-completions"
 
 export const getTokenCount = (messages: Array<Message>) => {
-  const input = messages.filter(
-    (m) => m.role !== "assistant" && typeof m.content === "string",
-  )
-  const output = messages.filter((m) => m.role === "assistant")
+  const simplifiedMessages = messages.map((message) => {
+    let content = ""
+    if (typeof message.content === "string") {
+      content = message.content
+    } else if (Array.isArray(message.content)) {
+      content = message.content
+        .filter((part) => part.type === "text")
+        .map((part) => (part as { text: string }).text)
+        .join("")
+    }
+    return { ...message, content }
+  })
 
-  const inputTokens = countTokens(input)
-  const outputTokens = countTokens(output)
+  let inputMessages = simplifiedMessages.filter((message) => {
+    return message.role !== "tool"
+  })
+  let outputMessages: typeof simplifiedMessages = []
+
+  const lastMessage = simplifiedMessages.at(-1)
+
+  if (lastMessage?.role === "assistant") {
+    inputMessages = simplifiedMessages.slice(0, -1)
+    outputMessages = [lastMessage]
+  }
+
+  // @ts-expect-error TS can't infer from arr.filter()
+  const inputTokens = countTokens(inputMessages)
+  // @ts-expect-error TS can't infer from arr.filter()
+  const outputTokens = countTokens(outputMessages)
 
   return {
     input: inputTokens,
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
new file mode 100644
index 000000000..25e8c1fb2
--- /dev/null
+++ b/src/routes/messages/handler.ts
@@ -0,0 +1,3 @@
+import type { Context } from "hono"
+
+export async function handleCompletion(c: Context) {}
diff --git a/src/routes/messages/openai-to-anthropic.ts b/src/routes/messages/openai-to-anthropic.ts
new file mode 100644
index 000000000..521da7946
--- /dev/null
+++ b/src/routes/messages/openai-to-anthropic.ts
@@ -0,0 +1,301 @@
+import {
+  type ChatCompletionResponse,
+  type ChatCompletionsPayload,
+  type ContentPart,
+  type Message,
+  type TextPart,
+  type Tool,
+  type ToolCall,
+} from "~/services/copilot/create-chat-completions"
+
+// Anthropic API Types
+
+export interface AnthropicMessagesPayload {
+  model: string
+  messages: Array<AnthropicMessage>
+  max_tokens: number
+  system?: string | Array<AnthropicTextBlock>
+  metadata?: {
+    user_id?: string
+  }
+  stop_sequences?: Array<string>
+  stream?: boolean
+  temperature?: number
+  top_p?: number
+  top_k?: number
+  tools?: Array<AnthropicTool>
+  tool_choice?: {
+    type: "auto" | "any" | "tool"
+    name?: string
+  }
+}
+
+interface AnthropicMessage {
+  role: "user" | "assistant"
+  content: string | Array<AnthropicContentBlock>
+}
+
+type AnthropicContentBlock =
+  | AnthropicTextBlock
+  | AnthropicImageBlock
+  | AnthropicToolResultBlock
+
+interface AnthropicTextBlock {
+  type: "text"
+  text: string
+}
+
+interface AnthropicImageBlock {
+  type: "image"
+  source: {
+    type: "base64"
+    media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
+    data: string
+  }
+}
+
+interface AnthropicToolResultBlock {
+  type: "tool_result"
+  tool_use_id: string
+  content: string
+}
+
+interface AnthropicTool {
+  name: string
+  description?: string
+  input_schema: Record<string, unknown>
+}
+
+export interface AnthropicResponse {
+  id: string
+  type: "message"
+  role: "assistant"
+  content: Array<AnthropicResponseContentBlock>
+  model: string
+  stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use" | null
+  stop_sequence: string | null
+  usage: {
+    input_tokens: number
+    output_tokens: number
+  }
+}
+
+export type AnthropicResponseContentBlock =
+  | AnthropicTextBlock
+  | AnthropicToolUseBlock
+
+interface AnthropicToolUseBlock {
+  type: "tool_use"
+  id: string
+  name: string
+  input: Record<string, unknown>
+}
+
+// Translation functions
+
+function getAnthropicTextBlocks(
+  messageContent: Message["content"],
+): Array<AnthropicTextBlock> {
+  if (typeof messageContent === "string") {
+    return [{ type: "text", text: messageContent }]
+  }
+
+  if (Array.isArray(messageContent)) {
+    return messageContent
+      .filter((part): part is TextPart => part.type === "text")
+      .map((part) => ({ type: "text", text: part.text }))
+  }
+
+  return []
+}
+
+function getAnthropicToolUseBlocks(
+  toolCalls: Array<ToolCall> | undefined,
+): Array<AnthropicToolUseBlock> {
+  if (!toolCalls) {
+    return []
+  }
+  return toolCalls.map((toolCall) => ({
+    type: "tool_use",
+    id: toolCall.id,
+    name: toolCall.function.name,
+    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
+  }))
+}
+
+function mapOpenAIStopReasonToAnthropic(
+  finishReason: ChatCompletionResponse["choices"][0]["finish_reason"],
+): AnthropicResponse["stop_reason"] {
+  const stopReasonMap = {
+    stop: "end_turn",
+    length: "max_tokens",
+    tool_calls: "tool_use",
+    content_filter: "end_turn",
+  } as const
+  return stopReasonMap[finishReason]
+}
+
+function mapContent(
+  content: string | Array<AnthropicContentBlock>,
+): string | Array<ContentPart> | null {
+  if (typeof content === "string") {
+    return content
+  }
+  if (!Array.isArray(content)) {
+    return null
+  }
+
+  const contentParts: Array<ContentPart> = []
+  for (const block of content) {
+    if (block.type === "text") {
+      contentParts.push({ type: "text", text: block.text })
+    } else if (block.type === "image") {
+      contentParts.push({
+        type: "image_url",
+        image_url: {
+          url: `data:${block.source.media_type};base64,${block.source.data}`,
+        },
+      })
+    }
+  }
+  return contentParts
+}
+
+function translateAnthropicMessagesToOpenAI(
+  anthropicMessages: Array<AnthropicMessage>,
+  system: string | Array<AnthropicTextBlock> | undefined,
+): Array<Message> {
+  const messages: Array<Message> = []
+
+  if (system) {
+    if (typeof system === "string") {
+      messages.push({ role: "system", content: system })
+    } else {
+      const systemText = system.map((block) => block.text).join("\n\n")
+      messages.push({ role: "system", content: systemText })
+    }
+  }
+
+  for (const message of anthropicMessages) {
+    if (message.role === "user" && Array.isArray(message.content)) {
+      const toolResultBlocks = message.content.filter(
+        (block): block is AnthropicToolResultBlock =>
+          block.type === "tool_result",
+      )
+      const otherBlocks = message.content.filter(
+        (block) => block.type !== "tool_result",
+      )
+
+      if (otherBlocks.length > 0) {
+        messages.push({
+          role: "user",
+          content: mapContent(otherBlocks),
+        })
+      }
+
+      for (const block of toolResultBlocks) {
+        messages.push({
+          role: "tool",
+          tool_call_id: block.tool_use_id,
+          content: block.content,
+        })
+      }
+    } else {
+      messages.push({
+        role: message.role,
+        content: mapContent(message.content),
+      })
+    }
+  }
+  return messages
+}
+
+function translateAnthropicToolsToOpenAI(
+  anthropicTools: Array<AnthropicTool> | undefined,
+): Array<Tool> | undefined {
+  if (!anthropicTools) {
+    return undefined
+  }
+  return anthropicTools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.input_schema,
+    },
+  }))
+}
+
+function translateAnthropicToolChoiceToOpenAI(
+  anthropicToolChoice: AnthropicMessagesPayload["tool_choice"],
+): ChatCompletionsPayload["tool_choice"] {
+  if (!anthropicToolChoice) {
+    return undefined
+  }
+
+  switch (anthropicToolChoice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      // The type definition for tool_choice is missing "required", but it's a valid value.
+      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+      // @ts-expect-error
+      return "required"
+    }
+    case "tool": {
+      if (anthropicToolChoice.name) {
+        return {
+          type: "function",
+          function: { name: anthropicToolChoice.name },
+        }
+      }
+      return undefined
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+export function translateToOpenAI(
+  payload: AnthropicMessagesPayload,
+): ChatCompletionsPayload {
+  return {
+    model: payload.model,
+    messages: translateAnthropicMessagesToOpenAI(
+      payload.messages,
+      payload.system,
+    ),
+    max_tokens: payload.max_tokens,
+    stop: payload.stop_sequences,
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    user: payload.metadata?.user_id,
+    tools: translateAnthropicToolsToOpenAI(payload.tools),
+    tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+  }
+}
+
+export function translateToAnthropic(
+  response: ChatCompletionResponse,
+): AnthropicResponse {
+  const choice = response.choices[0]
+  const textBlocks = getAnthropicTextBlocks(choice.message.content)
+  const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    model: response.model,
+    content: [...textBlocks, ...toolUseBlocks],
+    stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+    stop_sequence: null,
+    usage: {
+      input_tokens: response.usage?.prompt_tokens ?? 0,
+      output_tokens: response.usage?.completion_tokens ?? 0,
+    },
+  }
+}
diff --git a/src/routes/messages/route.ts b/src/routes/messages/route.ts
new file mode 100644
index 000000000..c55a3a7b2
--- /dev/null
+++ b/src/routes/messages/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/forward-error"
+
+import { handleCompletion } from "./handler"
+
+export const completionRoutes = new Hono()
+
+completionRoutes.post("/", async (c) => {
+  try {
+    return await handleCompletion(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})

From 6e0f213606d5ca44d9eb9e668c40e5e4b4b1f7fc Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 11:03:35 +0700
Subject: [PATCH 05/25] refactor: Rename file and translate payload/response in
 one file

---
 ...pic.ts => openai-anthropic-translation.ts} | 175 +++++++++---------
 .../copilot/create-chat-completions.ts        |   9 +-
 2 files changed, 92 insertions(+), 92 deletions(-)
 rename src/routes/messages/{openai-to-anthropic.ts => openai-anthropic-translation.ts} (97%)

diff --git a/src/routes/messages/openai-to-anthropic.ts b/src/routes/messages/openai-anthropic-translation.ts
similarity index 97%
rename from src/routes/messages/openai-to-anthropic.ts
rename to src/routes/messages/openai-anthropic-translation.ts
index 521da7946..d3ff0dcd9 100644
--- a/src/routes/messages/openai-to-anthropic.ts
+++ b/src/routes/messages/openai-anthropic-translation.ts
@@ -91,74 +91,26 @@ interface AnthropicToolUseBlock {
   input: Record<string, unknown>
 }
 
-// Translation functions
+// Payload translation
 
-function getAnthropicTextBlocks(
-  messageContent: Message["content"],
-): Array<AnthropicTextBlock> {
-  if (typeof messageContent === "string") {
-    return [{ type: "text", text: messageContent }]
-  }
-
-  if (Array.isArray(messageContent)) {
-    return messageContent
-      .filter((part): part is TextPart => part.type === "text")
-      .map((part) => ({ type: "text", text: part.text }))
-  }
-
-  return []
-}
-
-function getAnthropicToolUseBlocks(
-  toolCalls: Array<ToolCall> | undefined,
-): Array<AnthropicToolUseBlock> {
-  if (!toolCalls) {
-    return []
-  }
-  return toolCalls.map((toolCall) => ({
-    type: "tool_use",
-    id: toolCall.id,
-    name: toolCall.function.name,
-    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
-  }))
-}
-
-function mapOpenAIStopReasonToAnthropic(
-  finishReason: ChatCompletionResponse["choices"][0]["finish_reason"],
-): AnthropicResponse["stop_reason"] {
-  const stopReasonMap = {
-    stop: "end_turn",
-    length: "max_tokens",
-    tool_calls: "tool_use",
-    content_filter: "end_turn",
-  } as const
-  return stopReasonMap[finishReason]
-}
-
-function mapContent(
-  content: string | Array<AnthropicContentBlock>,
-): string | Array<ContentPart> | null {
-  if (typeof content === "string") {
-    return content
-  }
-  if (!Array.isArray(content)) {
-    return null
-  }
-
-  const contentParts: Array<ContentPart> = []
-  for (const block of content) {
-    if (block.type === "text") {
-      contentParts.push({ type: "text", text: block.text })
-    } else if (block.type === "image") {
-      contentParts.push({
-        type: "image_url",
-        image_url: {
-          url: `data:${block.source.media_type};base64,${block.source.data}`,
-        },
-      })
-    }
+export function translateToOpenAI(
+  payload: AnthropicMessagesPayload,
+): ChatCompletionsPayload {
+  return {
+    model: payload.model,
+    messages: translateAnthropicMessagesToOpenAI(
+      payload.messages,
+      payload.system,
+    ),
+    max_tokens: payload.max_tokens,
+    stop: payload.stop_sequences,
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    user: payload.metadata?.user_id,
+    tools: translateAnthropicToolsToOpenAI(payload.tools),
+    tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
   }
-  return contentParts
 }
 
 function translateAnthropicMessagesToOpenAI(
@@ -210,6 +162,32 @@ function translateAnthropicMessagesToOpenAI(
   return messages
 }
 
+function mapContent(
+  content: string | Array<AnthropicContentBlock>,
+): string | Array<ContentPart> | null {
+  if (typeof content === "string") {
+    return content
+  }
+  if (!Array.isArray(content)) {
+    return null
+  }
+
+  const contentParts: Array<ContentPart> = []
+  for (const block of content) {
+    if (block.type === "text") {
+      contentParts.push({ type: "text", text: block.text })
+    } else if (block.type === "image") {
+      contentParts.push({
+        type: "image_url",
+        image_url: {
+          url: `data:${block.source.media_type};base64,${block.source.data}`,
+        },
+      })
+    }
+  }
+  return contentParts
+}
+
 function translateAnthropicToolsToOpenAI(
   anthropicTools: Array<AnthropicTool> | undefined,
 ): Array<Tool> | undefined {
@@ -238,9 +216,6 @@ function translateAnthropicToolChoiceToOpenAI(
       return "auto"
     }
     case "any": {
-      // The type definition for tool_choice is missing "required", but it's a valid value.
-      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-      // @ts-expect-error
       return "required"
     }
     case "tool": {
@@ -258,25 +233,7 @@ function translateAnthropicToolChoiceToOpenAI(
   }
 }
 
-export function translateToOpenAI(
-  payload: AnthropicMessagesPayload,
-): ChatCompletionsPayload {
-  return {
-    model: payload.model,
-    messages: translateAnthropicMessagesToOpenAI(
-      payload.messages,
-      payload.system,
-    ),
-    max_tokens: payload.max_tokens,
-    stop: payload.stop_sequences,
-    stream: payload.stream,
-    temperature: payload.temperature,
-    top_p: payload.top_p,
-    user: payload.metadata?.user_id,
-    tools: translateAnthropicToolsToOpenAI(payload.tools),
-    tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
-  }
-}
+// Response translation
 
 export function translateToAnthropic(
   response: ChatCompletionResponse,
@@ -299,3 +256,45 @@ export function translateToAnthropic(
     },
   }
 }
+
+function getAnthropicTextBlocks(
+  messageContent: Message["content"],
+): Array<AnthropicTextBlock> {
+  if (typeof messageContent === "string") {
+    return [{ type: "text", text: messageContent }]
+  }
+
+  if (Array.isArray(messageContent)) {
+    return messageContent
+      .filter((part): part is TextPart => part.type === "text")
+      .map((part) => ({ type: "text", text: part.text }))
+  }
+
+  return []
+}
+
+function getAnthropicToolUseBlocks(
+  toolCalls: Array<ToolCall> | undefined,
+): Array<AnthropicToolUseBlock> {
+  if (!toolCalls) {
+    return []
+  }
+  return toolCalls.map((toolCall) => ({
+    type: "tool_use",
+    id: toolCall.id,
+    name: toolCall.function.name,
+    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
+  }))
+}
+
+function mapOpenAIStopReasonToAnthropic(
+  finishReason: ChatCompletionResponse["choices"][0]["finish_reason"],
+): AnthropicResponse["stop_reason"] {
+  const stopReasonMap = {
+    stop: "end_turn",
+    length: "max_tokens",
+    tool_calls: "tool_use",
+    content_filter: "end_turn",
+  } as const
+  return stopReasonMap[finishReason]
+}
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 4cf79e48d..aaee86cea 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -9,7 +9,7 @@ export const createChatCompletions = async (
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
-  const visionEnable = payload.messages.some(
+  const enableVision = payload.messages.some(
     (x) =>
       typeof x.content !== "string"
       && x.content?.some((x) => x.type === "image_url"),
@@ -17,7 +17,7 @@ export const createChatCompletions = async (
 
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
-    headers: copilotHeaders(state, visionEnable),
+    headers: copilotHeaders(state, enableVision),
     body: JSON.stringify(payload),
   })
 
@@ -60,7 +60,7 @@ interface Choice {
   index: number
   delta: Delta
   finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null
-  logprobs: null
+  logprobs: object | null
 }
 
 // Non-streaming types
@@ -82,7 +82,7 @@ export interface ChatCompletionResponse {
 interface ChoiceNonStreaming {
   index: number
   message: Message
-  logprobs: null
+  logprobs: object | null
   finish_reason: "stop" | "length" | "tool_calls" | "content_filter"
 }
 
@@ -108,6 +108,7 @@ export interface ChatCompletionsPayload {
   tool_choice?:
     | "none"
     | "auto"
+    | "required"
     | { type: "function"; function: { name: string } }
     | null
   user?: string | null

From d3d07967a561f739004e09cf80151bcfa49cd104 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 12:32:00 +0700
Subject: [PATCH 06/25] feat: Add Anthropic API translation and streaming
 support

---
 src/routes/messages/anthropic-types.ts        | 153 ++++++++++++
 src/routes/messages/non-stream-translation.ts | 217 ++++++++++++++++++
 .../messages/openai-anthropic-translation.ts  | 199 ++++++++++++++++
 src/routes/messages/route.ts                  |   3 +-
 src/routes/messages/stream-translation.ts     | 172 ++++++++++++++
 src/routes/messages/utils.ts                  |  16 ++
 6 files changed, 759 insertions(+), 1 deletion(-)
 create mode 100644 src/routes/messages/anthropic-types.ts
 create mode 100644 src/routes/messages/non-stream-translation.ts
 create mode 100644 src/routes/messages/stream-translation.ts
 create mode 100644 src/routes/messages/utils.ts

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
new file mode 100644
index 000000000..866abe352
--- /dev/null
+++ b/src/routes/messages/anthropic-types.ts
@@ -0,0 +1,153 @@
+// Anthropic API Types
+
+export interface AnthropicMessagesPayload {
+  model: string
+  messages: Array<AnthropicMessage>
+  max_tokens: number
+  system?: string | Array<AnthropicTextBlock>
+  metadata?: {
+    user_id?: string
+  }
+  stop_sequences?: Array<string>
+  stream?: boolean
+  temperature?: number
+  top_p?: number
+  top_k?: number
+  tools?: Array<AnthropicTool>
+  tool_choice?: {
+    type: "auto" | "any" | "tool" | "none"
+    name?: string
+  }
+}
+
+export interface AnthropicMessage {
+  role: "user" | "assistant"
+  content: string | Array<AnthropicContentBlock>
+}
+
+export type AnthropicContentBlock =
+  | AnthropicTextBlock
+  | AnthropicImageBlock
+  | AnthropicToolResultBlock
+
+export interface AnthropicTextBlock {
+  type: "text"
+  text: string
+}
+
+export interface AnthropicImageBlock {
+  type: "image"
+  source: {
+    type: "base64"
+    media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
+    data: string
+  }
+}
+
+export interface AnthropicToolResultBlock {
+  type: "tool_result"
+  tool_use_id: string
+  content: string
+  is_error?: boolean
+}
+
+export interface AnthropicTool {
+  name: string
+  description?: string
+  input_schema: Record<string, unknown>
+}
+
+export interface AnthropicResponse {
+  id: string
+  type: "message"
+  role: "assistant"
+  content: Array<AnthropicResponseContentBlock>
+  model: string
+  stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use" | null
+  stop_sequence: string | null
+  usage: {
+    input_tokens: number
+    output_tokens: number
+  }
+}
+
+export type AnthropicResponseContentBlock =
+  | AnthropicTextBlock
+  | AnthropicToolUseBlock
+
+export interface AnthropicToolUseBlock {
+  type: "tool_use"
+  id: string
+  name: string
+  input: Record<string, unknown>
+}
+
+// Anthropic Stream Event Types
+export interface AnthropicMessageStartEvent {
+  type: "message_start"
+  message: Omit<
+    AnthropicResponse,
+    "stop_reason" | "stop_sequence" | "content"
+  > & {
+    content: []
+  }
+}
+
+export interface AnthropicContentBlockStartEvent {
+  type: "content_block_start"
+  index: number
+  content_block:
+    | { type: "text"; text: string }
+    | (Omit<AnthropicToolUseBlock, "input"> & {
+        input: Record<string, unknown>
+      })
+}
+
+export interface AnthropicContentBlockDeltaEvent {
+  type: "content_block_delta"
+  index: number
+  delta:
+    | { type: "text_delta"; text: string }
+    | { type: "input_json_delta"; partial_json: string }
+}
+
+export interface AnthropicContentBlockStopEvent {
+  type: "content_block_stop"
+  index: number
+}
+
+export interface AnthropicMessageDeltaEvent {
+  type: "message_delta"
+  delta: {
+    stop_reason: AnthropicResponse["stop_reason"]
+    stop_sequence: string | null
+  }
+  // OpenAI does not provide token usage per chunk, so this is omitted.
+  // usage: { output_tokens: number }
+}
+
+export interface AnthropicMessageStopEvent {
+  type: "message_stop"
+}
+
+export type AnthropicStreamEventData =
+  | AnthropicMessageStartEvent
+  | AnthropicContentBlockStartEvent
+  | AnthropicContentBlockDeltaEvent
+  | AnthropicContentBlockStopEvent
+  | AnthropicMessageDeltaEvent
+  | AnthropicMessageStopEvent
+
+// State for streaming translation
+export interface AnthropicStreamState {
+  messageStartSent: boolean
+  contentBlockIndex: number
+  contentBlockOpen: boolean
+  toolCalls: {
+    [openAIToolIndex: number]: {
+      id: string
+      name: string
+      anthropicBlockIndex: number
+    }
+  }
+}
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
new file mode 100644
index 000000000..32c839cea
--- /dev/null
+++ b/src/routes/messages/non-stream-translation.ts
@@ -0,0 +1,217 @@
+import {
+  type ChatCompletionResponse,
+  type ChatCompletionsPayload,
+  type ContentPart,
+  type Message,
+  type TextPart,
+  type Tool,
+  type ToolCall,
+} from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicContentBlock,
+  type AnthropicMessage,
+  type AnthropicMessagesPayload,
+  type AnthropicResponse,
+  type AnthropicTextBlock,
+  type AnthropicTool,
+  type AnthropicToolResultBlock,
+  type AnthropicToolUseBlock,
+} from "./anthropic-types"
+import { mapOpenAIStopReasonToAnthropic } from "./utils"
+
+// Payload translation
+
+export function translateToOpenAI(
+  payload: AnthropicMessagesPayload,
+): ChatCompletionsPayload {
+  return {
+    model: payload.model,
+    messages: translateAnthropicMessagesToOpenAI(
+      payload.messages,
+      payload.system,
+    ),
+    max_tokens: payload.max_tokens,
+    stop: payload.stop_sequences,
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    user: payload.metadata?.user_id,
+    tools: translateAnthropicToolsToOpenAI(payload.tools),
+    tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+  }
+}
+
+function translateAnthropicMessagesToOpenAI(
+  anthropicMessages: Array<AnthropicMessage>,
+  system: string | Array<AnthropicTextBlock> | undefined,
+): Array<Message> {
+  const messages: Array<Message> = []
+
+  if (system) {
+    if (typeof system === "string") {
+      messages.push({ role: "system", content: system })
+    } else {
+      const systemText = system.map((block) => block.text).join("\n\n")
+      messages.push({ role: "system", content: systemText })
+    }
+  }
+
+  for (const message of anthropicMessages) {
+    if (message.role === "user" && Array.isArray(message.content)) {
+      const toolResultBlocks = message.content.filter(
+        (block): block is AnthropicToolResultBlock =>
+          block.type === "tool_result",
+      )
+      const otherBlocks = message.content.filter(
+        (block) => block.type !== "tool_result",
+      )
+
+      if (otherBlocks.length > 0) {
+        messages.push({
+          role: "user",
+          content: mapContent(otherBlocks),
+        })
+      }
+
+      for (const block of toolResultBlocks) {
+        messages.push({
+          role: "tool",
+          tool_call_id: block.tool_use_id,
+          content: block.content,
+        })
+      }
+    } else {
+      messages.push({
+        role: message.role,
+        content: mapContent(message.content),
+      })
+    }
+  }
+  return messages
+}
+
+function mapContent(
+  content: string | Array<AnthropicContentBlock>,
+): string | Array<ContentPart> | null {
+  if (typeof content === "string") {
+    return content
+  }
+  if (!Array.isArray(content)) {
+    return null
+  }
+
+  const contentParts: Array<ContentPart> = []
+  for (const block of content) {
+    if (block.type === "text") {
+      contentParts.push({ type: "text", text: block.text })
+    } else if (block.type === "image") {
+      contentParts.push({
+        type: "image_url",
+        image_url: {
+          url: `data:${block.source.media_type};base64,${block.source.data}`,
+        },
+      })
+    }
+  }
+  return contentParts
+}
+
+function translateAnthropicToolsToOpenAI(
+  anthropicTools: Array<AnthropicTool> | undefined,
+): Array<Tool> | undefined {
+  if (!anthropicTools) {
+    return undefined
+  }
+  return anthropicTools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.input_schema,
+    },
+  }))
+}
+
+function translateAnthropicToolChoiceToOpenAI(
+  anthropicToolChoice: AnthropicMessagesPayload["tool_choice"],
+): ChatCompletionsPayload["tool_choice"] {
+  if (!anthropicToolChoice) {
+    return undefined
+  }
+
+  switch (anthropicToolChoice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      if (anthropicToolChoice.name) {
+        return {
+          type: "function",
+          function: { name: anthropicToolChoice.name },
+        }
+      }
+      return undefined
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+// Response translation
+
+export function translateToAnthropic(
+  response: ChatCompletionResponse,
+): AnthropicResponse {
+  const choice = response.choices[0]
+  const textBlocks = getAnthropicTextBlocks(choice.message.content)
+  const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    model: response.model,
+    content: [...textBlocks, ...toolUseBlocks],
+    stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+    stop_sequence: null,
+    usage: {
+      input_tokens: response.usage?.prompt_tokens ?? 0,
+      output_tokens: response.usage?.completion_tokens ?? 0,
+    },
+  }
+}
+
+function getAnthropicTextBlocks(
+  messageContent: Message["content"],
+): Array<AnthropicTextBlock> {
+  if (typeof messageContent === "string") {
+    return [{ type: "text", text: messageContent }]
+  }
+
+  if (Array.isArray(messageContent)) {
+    return messageContent
+      .filter((part): part is TextPart => part.type === "text")
+      .map((part) => ({ type: "text", text: part.text }))
+  }
+
+  return []
+}
+
+function getAnthropicToolUseBlocks(
+  toolCalls: Array<ToolCall> | undefined,
+): Array<AnthropicToolUseBlock> {
+  if (!toolCalls) {
+    return []
+  }
+  return toolCalls.map((toolCall) => ({
+    type: "tool_use",
+    id: toolCall.id,
+    name: toolCall.function.name,
+    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
+  }))
+}
diff --git a/src/routes/messages/openai-anthropic-translation.ts b/src/routes/messages/openai-anthropic-translation.ts
index d3ff0dcd9..0a17ff29d 100644
--- a/src/routes/messages/openai-anthropic-translation.ts
+++ b/src/routes/messages/openai-anthropic-translation.ts
@@ -1,4 +1,5 @@
 import {
+  type ChatCompletionChunk,
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
   type ContentPart,
@@ -91,6 +92,76 @@ interface AnthropicToolUseBlock {
   input: Record<string, unknown>
 }
 
+// Anthropic Stream Event Types
+export interface AnthropicMessageStartEvent {
+  type: "message_start"
+  message: Omit<
+    AnthropicResponse,
+    "stop_reason" | "stop_sequence" | "content"
+  > & {
+    content: []
+  }
+}
+
+export interface AnthropicContentBlockStartEvent {
+  type: "content_block_start"
+  index: number
+  content_block:
+    | { type: "text"; text: string }
+    | (Omit<AnthropicToolUseBlock, "input"> & {
+        input: Record<string, unknown>
+      })
+}
+
+export interface AnthropicContentBlockDeltaEvent {
+  type: "content_block_delta"
+  index: number
+  delta:
+    | { type: "text_delta"; text: string }
+    | { type: "input_json_delta"; partial_json: string }
+}
+
+export interface AnthropicContentBlockStopEvent {
+  type: "content_block_stop"
+  index: number
+}
+
+export interface AnthropicMessageDeltaEvent {
+  type: "message_delta"
+  delta: {
+    stop_reason: AnthropicResponse["stop_reason"]
+    stop_sequence: string | null
+  }
+  // OpenAI does not provide token usage per chunk, so this is omitted.
+  // usage: { output_tokens: number }
+}
+
+export interface AnthropicMessageStopEvent {
+  type: "message_stop"
+}
+
+export type AnthropicStreamEventData =
+  | AnthropicMessageStartEvent
+  | AnthropicContentBlockStartEvent
+  | AnthropicContentBlockDeltaEvent
+  | AnthropicContentBlockStopEvent
+  | AnthropicMessageDeltaEvent
+  | AnthropicMessageStopEvent
+
+// State for streaming translation
+export interface AnthropicStreamState {
+  messageStartSent: boolean
+  contentBlockIndex: number
+  contentBlockOpen: boolean
+  toolCalls: {
+    [openAIToolIndex: number]: {
+      id: string
+      name: string
+      anthropicBlockIndex: number
+    }
+  }
+}
+
 // Payload translation
 
 export function translateToOpenAI(
@@ -235,6 +306,134 @@ function translateAnthropicToolChoiceToOpenAI(
 
 // Response translation
 
+// Stream response translation
+
+/**
+ * Translates a single OpenAI ChatCompletionChunk to a series of Anthropic-style stream events.
+ * This function is stateful and requires a state object to be maintained across calls.
+ *
+ * @param chunk The OpenAI chunk to translate.
+ * @param state The current state of the stream translation.
+ * @param inputTokens The number of tokens in the prompt, required for the initial message_start event.
+ * @returns An array of Anthropic stream event data objects.
+ */
+export function translateChunkToAnthropicEvents(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+  inputTokens: number,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+  const delta = chunk.choices[0].delta
+
+  // 1. Handle message_start
+  if (delta.role === "assistant" && !state.messageStartSent) {
+    events.push({
+      type: "message_start",
+      message: {
+        id: chunk.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: chunk.model,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 1, // Placeholder, not updated in subsequent events
+        },
+      },
+    })
+    state.messageStartSent = true
+  }
+
+  // 2. Handle text content
+  if (delta.content) {
+    if (!state.contentBlockOpen) {
+      // Start a new text block if no block is open
+      events.push({
+        type: "content_block_start",
+        index: state.contentBlockIndex,
+        content_block: { type: "text", text: "" },
+      })
+      state.contentBlockOpen = true
+    }
+    events.push({
+      type: "content_block_delta",
+      index: state.contentBlockIndex,
+      delta: { type: "text_delta", text: delta.content },
+    })
+  }
+
+  // 3. Handle tool calls
+  if (delta.tool_calls) {
+    for (const toolCallDelta of delta.tool_calls) {
+      // A new tool call is starting
+      if (toolCallDelta.id && toolCallDelta.function?.name) {
+        if (state.contentBlockOpen) {
+          // Close the previous content block (which must be a text block)
+          events.push({
+            type: "content_block_stop",
+            index: state.contentBlockIndex,
+          })
+          state.contentBlockIndex++
+        }
+        const anthropicBlockIndex = state.contentBlockIndex
+        state.toolCalls[toolCallDelta.index] = {
+          id: toolCallDelta.id,
+          name: toolCallDelta.function.name,
+          anthropicBlockIndex,
+        }
+        events.push({
+          type: "content_block_start",
+          index: anthropicBlockIndex,
+          content_block: {
+            type: "tool_use",
+            id: toolCallDelta.id,
+            name: toolCallDelta.function.name,
+            input: {},
+          },
+        })
+        state.contentBlockOpen = true
+      }
+
+      // Argument chunks for the tool call
+      if (toolCallDelta.function?.arguments) {
+        const toolInfo = state.toolCalls[toolCallDelta.index]
+        if (toolInfo) {
+          events.push({
+            type: "content_block_delta",
+            index: toolInfo.anthropicBlockIndex,
+            delta: {
+              type: "input_json_delta",
+              partial_json: toolCallDelta.function.arguments,
+            },
+          })
+        }
+      }
+    }
+  }
+
+  // 4. Handle end of stream
+  const finishReason = chunk.choices[0].finish_reason
+  if (finishReason) {
+    if (state.contentBlockOpen) {
+      events.push({
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      })
+      state.contentBlockOpen = false
+    }
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: mapOpenAIStopReasonToAnthropic(finishReason),
+        stop_sequence: null,
+      },
+    })
+    events.push({ type: "message_stop" })
+  }
+
+  return events
+}
+
 export function translateToAnthropic(
   response: ChatCompletionResponse,
 ): AnthropicResponse {
diff --git a/src/routes/messages/route.ts b/src/routes/messages/route.ts
index c55a3a7b2..b61dd4065 100644
--- a/src/routes/messages/route.ts
+++ b/src/routes/messages/route.ts
@@ -8,7 +8,8 @@ export const completionRoutes = new Hono()
 
 completionRoutes.post("/", async (c) => {
   try {
-    return await handleCompletion(c)
+    await handleCompletion(c)
+    return
   } catch (error) {
     return await forwardError(c, error)
   }
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
new file mode 100644
index 000000000..3093a7c7e
--- /dev/null
+++ b/src/routes/messages/stream-translation.ts
@@ -0,0 +1,172 @@
+import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicStreamEventData,
+  type AnthropicStreamState,
+} from "./anthropic-types"
+import { mapOpenAIStopReasonToAnthropic } from "./utils"
+
+function handleMessageStart(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+  inputTokens: number,
+): AnthropicStreamEventData | undefined {
+  if (chunk.choices[0].delta.role === "assistant" && !state.messageStartSent) {
+    state.messageStartSent = true
+    return {
+      type: "message_start",
+      message: {
+        id: chunk.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: chunk.model,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 1, // Placeholder, not updated in subsequent events
+        },
+      },
+    }
+  }
+}
+
+function handleTextContent(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+  const { content } = chunk.choices[0].delta
+  if (!content) {
+    return events
+  }
+
+  if (!state.contentBlockOpen) {
+    // Start a new text block if no block is open
+    events.push({
+      type: "content_block_start",
+      index: state.contentBlockIndex,
+      content_block: { type: "text", text: "" },
+    })
+    state.contentBlockOpen = true
+  }
+  events.push({
+    type: "content_block_delta",
+    index: state.contentBlockIndex,
+    delta: { type: "text_delta", text: content },
+  })
+  return events
+}
+
+function handleToolCalls(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+  const { tool_calls } = chunk.choices[0].delta
+  if (!tool_calls) {
+    return events
+  }
+
+  for (const toolCallDelta of tool_calls) {
+    // A new tool call is starting
+    if (toolCallDelta.id && toolCallDelta.function?.name) {
+      if (state.contentBlockOpen) {
+        // Close the previous content block (which must be a text block)
+        events.push({
+          type: "content_block_stop",
+          index: state.contentBlockIndex,
+        })
+        state.contentBlockIndex++
+      }
+      const anthropicBlockIndex = state.contentBlockIndex
+      state.toolCalls[toolCallDelta.index] = {
+        id: toolCallDelta.id,
+        name: toolCallDelta.function.name,
+        anthropicBlockIndex,
+      }
+      events.push({
+        type: "content_block_start",
+        index: anthropicBlockIndex,
+        content_block: {
+          type: "tool_use",
+          id: toolCallDelta.id,
+          name: toolCallDelta.function.name,
+          input: {},
+        },
+      })
+      state.contentBlockOpen = true
+    }
+
+    // Argument chunks for the tool call
+    if (toolCallDelta.function?.arguments) {
+      const toolInfo = state.toolCalls[toolCallDelta.index]
+      if (toolInfo) {
+        events.push({
+          type: "content_block_delta",
+          index: toolInfo.anthropicBlockIndex,
+          delta: {
+            type: "input_json_delta",
+            partial_json: toolCallDelta.function.arguments,
+          },
+        })
+      }
+    }
+  }
+  return events
+}
+
+function handleEndOfStream(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+  const { finish_reason } = chunk.choices[0]
+  if (finish_reason === null) {
+    return events
+  }
+
+  if (state.contentBlockOpen) {
+    events.push({
+      type: "content_block_stop",
+      index: state.contentBlockIndex,
+    })
+    state.contentBlockOpen = false
+  }
+  events.push({
+    type: "message_delta",
+    delta: {
+      stop_reason: mapOpenAIStopReasonToAnthropic(finish_reason),
+      stop_sequence: null,
+    },
+  })
+  events.push({ type: "message_stop" })
+  return events
+}
+
+/**
+ * Translates a single OpenAI ChatCompletionChunk to a series of Anthropic-style stream events.
+ * This function is stateful and requires a state object to be maintained across calls.
+ *
+ * @param chunk The OpenAI chunk to translate.
+ * @param state The current state of the stream translation.
+ * @param inputTokens The number of tokens in the prompt, required for the initial message_start event.
+ * @returns An array of Anthropic stream event data objects.
+ */
+export function translateChunkToAnthropicEvents(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+  inputTokens: number,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+
+  const messageStartEvent = handleMessageStart(chunk, state, inputTokens)
+  if (messageStartEvent) {
+    events.push(messageStartEvent)
+  }
+
+  events.push(...handleTextContent(chunk, state))
+  events.push(...handleToolCalls(chunk, state))
+  events.push(...handleEndOfStream(chunk, state))
+
+  return events
+}
diff --git a/src/routes/messages/utils.ts b/src/routes/messages/utils.ts
new file mode 100644
index 000000000..d0febfc9d
--- /dev/null
+++ b/src/routes/messages/utils.ts
@@ -0,0 +1,16 @@
+import { type AnthropicResponse } from "./anthropic-types"
+
+export function mapOpenAIStopReasonToAnthropic(
+  finishReason: "stop" | "length" | "tool_calls" | "content_filter" | null,
+): AnthropicResponse["stop_reason"] {
+  if (finishReason === null) {
+    return null
+  }
+  const stopReasonMap = {
+    stop: "end_turn",
+    length: "max_tokens",
+    tool_calls: "tool_use",
+    content_filter: "end_turn",
+  } as const
+  return stopReasonMap[finishReason]
+}

From 95a4178c2cbe27a71740d4ca705eed1a43e65e06 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 12:36:21 +0700
Subject: [PATCH 07/25] feat: Add support for "none" tool choice in Anthropic
 translation

---
 src/routes/messages/non-stream-translation.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index 32c839cea..3f91238a5 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -156,6 +156,9 @@ function translateAnthropicToolChoiceToOpenAI(
       }
       return undefined
     }
+    case "none": {
+      return "none"
+    }
     default: {
       return undefined
     }

From 6fb9c8537200932e3e2fa7cd25d85261e824418c Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 13:20:56 +0700
Subject: [PATCH 08/25] feat: Update Anthropic API documentation and types

---
 docs/anthropic.md                         | 105 +++++++------
 src/routes/messages/anthropic-types.ts    |  19 ++-
 src/routes/messages/stream-translation.ts | 172 ----------------------
 3 files changed, 70 insertions(+), 226 deletions(-)

diff --git a/docs/anthropic.md b/docs/anthropic.md
index b391aec2d..d41043a24 100644
--- a/docs/anthropic.md
+++ b/docs/anthropic.md
@@ -18,73 +18,84 @@ Creates a model response for the given conversation.
 
 The request body is a JSON object.
 
-| Parameter        | Type            | Required | Description                                                                                                                                         |
-| :--------------- | :-------------- | :------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`          | string          | Yes      | The model that will complete your prompt. Example: `claude-3-7-sonnet-20250219`.                                                                    |
-| `messages`       | array           | Yes      | A list of input messages comprising the conversation so far. See [The Message Object](https://www.google.com/search?q=%23the-message-object) below. |
-| `max_tokens`     | integer         | Yes      | The maximum number of tokens to generate. Different models have different maximums.                                                                 |
-| `system`         | string or array | No       | A system prompt to provide context and instructions to Claude, such as specifying a role or goal.                                                   |
-| `metadata`       | object          | No       | An object for metadata, such as a `user_id`, to help detect abuse. Do not include any personally identifying information.                           |
-| `stop_sequences` | array           | No       | Custom text sequences that will cause the model to stop generating.                                                                                 |
-| `stream`         | boolean         | No       | If set, the response will be incrementally streamed using server-sent events. Defaults to `false`.                                                  |
-| `temperature`    | number          | No       | The amount of randomness injected into the response, ranging from `0.0` to `1.0`. Defaults to `1.0`.                                                |
-| `top_p`          | number          | No       | Use nucleus sampling. The model considers tokens with `top_p` probability mass. Should alter `temperature` or `top_p`, but not both.                |
-| `top_k`          | integer         | No       | Only sample from the top K options for each subsequent token. Recommended for advanced use cases.                                                   |
-| `tools`          | array           | No       | A list of tools the model may use. See [The Tool Object](https://www.google.com/search?q=%23the-tool-object) below.                                 |
-| `tool_choice`    | object          | No       | Controls how the model should use the provided tools. Can be `auto`, `any`, `tool`, or `none`.                                                      |
+| Parameter        | Type            | Required | Description                                                                                                                                           |
+| :--------------- | :-------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`          | string          | Yes      | The model that will complete your prompt. Example: `claude-3-7-sonnet-20250219`.                                                                      |
+| `messages`       | array           | Yes      | A list of input messages comprising the conversation so far. See [The Message Object](https://www.google.com/search?q=%23the-message-object) below.   |
+| `max_tokens`     | integer         | Yes      | The maximum number of tokens to generate. Different models have different maximum values for this parameter.                                          |
+| `system`         | string or array | No       | A system prompt to provide context and instructions to Claude, such as specifying a role or goal.                                                     |
+| `metadata`       | object          | No       | An object for metadata, such as a `user_id`, to help detect abuse. Do not include any personally identifying information.                             |
+| `stop_sequences` | array           | No       | Custom text sequences that will cause the model to stop generating.                                                                                   |
+| `stream`         | boolean         | No       | If set, the response will be incrementally streamed using server-sent events. Defaults to `false`.                                                    |
+| `temperature`    | number          | No       | The amount of randomness injected into the response, ranging from `0.0` to `1.0`. Defaults to `1.0`.                                                  |
+| `thinking`       | object          | No       | Configuration for enabling Claude's extended thinking process, which shows reasoning steps before the final answer.                                   |
+| `top_p`          | number          | No       | Use nucleus sampling. The model considers tokens with `top_p` probability mass. You should alter `temperature` or `top_p`, but not both.              |
+| `top_k`          | integer         | No       | Only sample from the top K options for each subsequent token. Recommended for advanced use cases only.                                                |
+| `tools`          | array           | No       | A list of tools the model may use. See [The Tool Object](https://www.google.com/search?q=%23the-tool-object) below.                                   |
+| `tool_choice`    | object          | No       | Controls how the model should use tools. Can be `{"type": "auto"}`, `{"type": "any"}`, `{"type": "tool", "name": "tool_name"}` or `{"type": "none"}`. |
+| `service_tier`   | string          | No       | Can be set to `auto` or `standard_only` to determine whether to use priority capacity.                                                                |
 
 #### The Message Object
 
-The `messages` array consists of message objects, where each object has a `role` and `content`. Models are trained on alternating `user` and `assistant` turns.
+The `messages` array consists of message objects, where each object has a `role` and `content`. Models are trained on alternating `user` and `assistant` conversational turns.
 
-| Parameter | Type            | Required | Description                                                                                                                         |
-| :-------- | :-------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------- |
-| `role`    | string          | Yes      | The role of the message author. Must be either `user` or `assistant`.                                                               |
-| `content` | string or array | Yes      | The content of the message. This can be a simple string or an array of content blocks for multimodal input (e.g., text and images). |
+| Parameter | Type            | Required | Description                                                                                                 |
+| :-------- | :-------------- | :------- | :---------------------------------------------------------------------------------------------------------- |
+| `role`    | string          | Yes      | The role of the message author. Must be either `user` or `assistant`.                                       |
+| `content` | string or array | Yes      | The content of the message. This can be a simple string or an array of content blocks for multimodal input. |
 
 **Content Blocks:** For multimodal input, the `content` array can contain different types of blocks.
 
 - **`text`**: A block with a `type` of "text" and a `text` field containing the string.
-- **`image`**: A block with a `type` of "image" and a `source` object. The source must specify its `type` (e.g., "base64"), `media_type` (e.g., "image/jpeg"), and `data`.
-- **`tool_result`**: A block used to return the output of a tool back to the model. It includes the `tool_use_id`, `content`, and an optional `is_error` flag.
+- **`image`**: Starting with Claude 3 models, you can send image content blocks. The `source` object must specify a `type` of "base64", a `media_type` (`image/jpeg`, `image/png`, `image/gif`, or `image/webp`), and the `data`.
+- **`tool_result`**: A block used to return the output of a tool back to the model. It includes the `tool_use_id`, the `content` from the tool's execution, and an optional `is_error` flag.
 
 #### The Tool Object
 
 The `tools` array allows you to define client-side tools the model can call.
 
-| Parameter      | Type   | Required | Description                                                                                               |
-| :------------- | :----- | :------- | :-------------------------------------------------------------------------------------------------------- |
-| `name`         | string | Yes      | The name of the tool, matching `^[a-zA-Z0-9_-]{1,64}$`.                                                   |
-| `description`  | string | No       | A detailed description of what the tool does, which helps the model decide when to use it.                |
-| `input_schema` | object | Yes      | A [JSON Schema](https://json-schema.org/draft/2020-12) object describing the parameters the tool accepts. |
+| Parameter      | Type   | Required | Description                                                                                                      |
+| :------------- | :----- | :------- | :--------------------------------------------------------------------------------------------------------------- |
+| `name`         | string | Yes      | The name of the tool, which must match the pattern `^[a-zA-Z0-9_-]{1,64}$`.                                      |
+| `description`  | string | No       | A detailed, strongly-recommended description of what the tool does, which helps the model decide when to use it. |
+| `input_schema` | object | Yes      | A [JSON Schema](https://json-schema.org/draft/2020-12) object describing the parameters the tool accepts.        |
 
 #### Response (200 OK)
 
-A successful non-streaming request returns a `Message` object.
+A successful **non-streaming** request returns a `Message` object.
 
-| Parameter       | Type   | Description                                                                                                               |
-| :-------------- | :----- | :------------------------------------------------------------------------------------------------------------------------ |
-| `id`            | string | A unique identifier for the message object.                                                                               |
-| `type`          | string | The object type, which is always `message`.                                                                               |
-| `role`          | string | The role of the author, which is always `assistant`.                                                                      |
-| `content`       | array  | An array of content blocks generated by the model (e.g., `text` or `tool_use`).                                           |
-| `model`         | string | The model that handled the request.                                                                                       |
-| `stop_reason`   | string | The reason the model stopped generating tokens. Can be `end_turn`, `max_tokens`, `stop_sequence`, or `tool_use`.          |
-| `stop_sequence` | string | If the model was stopped by a stop sequence, this field will contain which sequence was generated. Can be null.           |
-| `usage`         | object | An object containing token usage statistics. See [The Usage Object](https://www.google.com/search?q=%23the-usage-object). |
+| Parameter       | Type   | Description                                                                                                                               |
+| :-------------- | :----- | :---------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`            | string | A unique identifier for the message object.                                                                                               |
+| `type`          | string | The object type, which is always `message`.                                                                                               |
+| `role`          | string | The role of the author, which is always `assistant`.                                                                                      |
+| `content`       | array  | An array of content blocks generated by the model (e.g., `text` or `tool_use`).                                                           |
+| `model`         | string | The model that handled the request.                                                                                                       |
+| `stop_reason`   | string | The reason the model stopped generating tokens. Can be `end_turn`, `max_tokens`, `stop_sequence`, `tool_use`, `pause_turn`, or `refusal`. |
+| `stop_sequence` | string | If the model was stopped by a custom stop sequence, this field will contain which sequence was generated. Can be null.                    |
+| `usage`         | object | An object containing token usage statistics. See [The Usage Object](https://www.google.com/search?q=%23the-usage-object).                 |
+
+#### Streaming Response (200 OK)
+
+If `stream: true` is set, the API streams back a sequence of server-sent events. The response is a series of JSON events that incrementally build the complete message object.
+
+According to the documentation, the `stop_reason` provides insight into the stream's state: in the initial `message_start` event, the `stop_reason` field will be `null`. In all other events, it will be non-null once the stopping condition is known.
 
 #### The Usage Object
 
 The `usage` object details billing and rate-limit token counts.
 
-| Parameter       | Type    | Description                            |
-| :-------------- | :------ | :------------------------------------- |
-| `input_tokens`  | integer | The number of input tokens used.       |
-| `output_tokens` | integer | The number of output tokens generated. |
+| Parameter                     | Type    | Description                                                                 |
+| :---------------------------- | :------ | :-------------------------------------------------------------------------- |
+| `input_tokens`                | integer | The number of input tokens used.                                            |
+| `output_tokens`               | integer | The number of output tokens generated.                                      |
+| `cache_creation_input_tokens` | integer | The number of input tokens used to create a cache entry.                    |
+| `cache_read_input_tokens`     | integer | The number of input tokens read from the cache.                             |
+| `service_tier`                | string  | The service tier used for the request (`standard`, `priority`, or `batch`). |
 
 ### Count Message Tokens
 
-Calculates the number of tokens for a given set of messages without executing the model.
+Calculates the number of tokens for a given set of messages without creating it.
 
 **Endpoint:** `POST /v1/messages/count_tokens`
 
@@ -103,9 +114,9 @@ The request accepts a subset of the "Create a Message" parameters.
 
 A successful request returns a JSON object.
 
-| Parameter      | Type    | Description                                                                     |
-| :------------- | :------ | :------------------------------------------------------------------------------ |
-| `input_tokens` | integer | The total number of tokens counted from the messages, system prompt, and tools. |
+| Parameter      | Type    | Description                                                                                |
+| :------------- | :------ | :----------------------------------------------------------------------------------------- |
+| `input_tokens` | integer | The total number of tokens across the provided list of messages, system prompt, and tools. |
 
 ---
 
@@ -115,7 +126,7 @@ The Models API allows you to list and retrieve information about available model
 
 ### List Models
 
-Lists the currently available models, with the most recent models appearing first.
+Lists the currently available models, with the most recently released models appearing first.
 
 **Endpoint:** `GET /v1/models`
 
@@ -126,7 +137,7 @@ A successful request returns a list of model objects.
 | Parameter  | Type    | Description                                                                     |
 | :--------- | :------ | :------------------------------------------------------------------------------ |
 | `data`     | array   | A list of [Model Objects](https://www.google.com/search?q=%23the-model-object). |
-| `has_more` | boolean | Indicates if more results are available for pagination.                         |
+| `has_more` | boolean | Indicates if there are more results in the requested page direction.            |
 
 ### Get a Model
 
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 866abe352..4e27ad58f 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -63,7 +63,13 @@ export interface AnthropicResponse {
   role: "assistant"
   content: Array<AnthropicResponseContentBlock>
   model: string
-  stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use" | null
+  stop_reason:
+    | "end_turn"
+    | "max_tokens"
+    | "stop_sequence"
+    | "tool_use"
+    | "pause_turn"
+    | "refusal"
   stop_sequence: string | null
   usage: {
     input_tokens: number
@@ -85,11 +91,10 @@ export interface AnthropicToolUseBlock {
 // Anthropic Stream Event Types
 export interface AnthropicMessageStartEvent {
   type: "message_start"
-  message: Omit<
-    AnthropicResponse,
-    "stop_reason" | "stop_sequence" | "content"
-  > & {
+  message: Omit<AnthropicResponse, "content" | "stop_reason" | "stop_sequence"> & {
     content: []
+    stop_reason: null
+    stop_sequence: null
   }
 }
 
@@ -119,8 +124,8 @@ export interface AnthropicContentBlockStopEvent {
 export interface AnthropicMessageDeltaEvent {
   type: "message_delta"
   delta: {
-    stop_reason: AnthropicResponse["stop_reason"]
-    stop_sequence: string | null
+    stop_reason?: AnthropicResponse["stop_reason"]
+    stop_sequence?: string | null
   }
   // OpenAI does not provide token usage per chunk, so this is omitted.
   // usage: { output_tokens: number }
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
index 3093a7c7e..e69de29bb 100644
--- a/src/routes/messages/stream-translation.ts
+++ b/src/routes/messages/stream-translation.ts
@@ -1,172 +0,0 @@
-import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions"
-
-import {
-  type AnthropicStreamEventData,
-  type AnthropicStreamState,
-} from "./anthropic-types"
-import { mapOpenAIStopReasonToAnthropic } from "./utils"
-
-function handleMessageStart(
-  chunk: ChatCompletionChunk,
-  state: AnthropicStreamState,
-  inputTokens: number,
-): AnthropicStreamEventData | undefined {
-  if (chunk.choices[0].delta.role === "assistant" && !state.messageStartSent) {
-    state.messageStartSent = true
-    return {
-      type: "message_start",
-      message: {
-        id: chunk.id,
-        type: "message",
-        role: "assistant",
-        content: [],
-        model: chunk.model,
-        usage: {
-          input_tokens: inputTokens,
-          output_tokens: 1, // Placeholder, not updated in subsequent events
-        },
-      },
-    }
-  }
-}
-
-function handleTextContent(
-  chunk: ChatCompletionChunk,
-  state: AnthropicStreamState,
-): Array<AnthropicStreamEventData> {
-  const events: Array<AnthropicStreamEventData> = []
-  const { content } = chunk.choices[0].delta
-  if (!content) {
-    return events
-  }
-
-  if (!state.contentBlockOpen) {
-    // Start a new text block if no block is open
-    events.push({
-      type: "content_block_start",
-      index: state.contentBlockIndex,
-      content_block: { type: "text", text: "" },
-    })
-    state.contentBlockOpen = true
-  }
-  events.push({
-    type: "content_block_delta",
-    index: state.contentBlockIndex,
-    delta: { type: "text_delta", text: content },
-  })
-  return events
-}
-
-function handleToolCalls(
-  chunk: ChatCompletionChunk,
-  state: AnthropicStreamState,
-): Array<AnthropicStreamEventData> {
-  const events: Array<AnthropicStreamEventData> = []
-  const { tool_calls } = chunk.choices[0].delta
-  if (!tool_calls) {
-    return events
-  }
-
-  for (const toolCallDelta of tool_calls) {
-    // A new tool call is starting
-    if (toolCallDelta.id && toolCallDelta.function?.name) {
-      if (state.contentBlockOpen) {
-        // Close the previous content block (which must be a text block)
-        events.push({
-          type: "content_block_stop",
-          index: state.contentBlockIndex,
-        })
-        state.contentBlockIndex++
-      }
-      const anthropicBlockIndex = state.contentBlockIndex
-      state.toolCalls[toolCallDelta.index] = {
-        id: toolCallDelta.id,
-        name: toolCallDelta.function.name,
-        anthropicBlockIndex,
-      }
-      events.push({
-        type: "content_block_start",
-        index: anthropicBlockIndex,
-        content_block: {
-          type: "tool_use",
-          id: toolCallDelta.id,
-          name: toolCallDelta.function.name,
-          input: {},
-        },
-      })
-      state.contentBlockOpen = true
-    }
-
-    // Argument chunks for the tool call
-    if (toolCallDelta.function?.arguments) {
-      const toolInfo = state.toolCalls[toolCallDelta.index]
-      if (toolInfo) {
-        events.push({
-          type: "content_block_delta",
-          index: toolInfo.anthropicBlockIndex,
-          delta: {
-            type: "input_json_delta",
-            partial_json: toolCallDelta.function.arguments,
-          },
-        })
-      }
-    }
-  }
-  return events
-}
-
-function handleEndOfStream(
-  chunk: ChatCompletionChunk,
-  state: AnthropicStreamState,
-): Array<AnthropicStreamEventData> {
-  const events: Array<AnthropicStreamEventData> = []
-  const { finish_reason } = chunk.choices[0]
-  if (finish_reason === null) {
-    return events
-  }
-
-  if (state.contentBlockOpen) {
-    events.push({
-      type: "content_block_stop",
-      index: state.contentBlockIndex,
-    })
-    state.contentBlockOpen = false
-  }
-  events.push({
-    type: "message_delta",
-    delta: {
-      stop_reason: mapOpenAIStopReasonToAnthropic(finish_reason),
-      stop_sequence: null,
-    },
-  })
-  events.push({ type: "message_stop" })
-  return events
-}
-
-/**
- * Translates a single OpenAI ChatCompletionChunk to a series of Anthropic-style stream events.
- * This function is stateful and requires a state object to be maintained across calls.
- *
- * @param chunk The OpenAI chunk to translate.
- * @param state The current state of the stream translation.
- * @param inputTokens The number of tokens in the prompt, required for the initial message_start event.
- * @returns An array of Anthropic stream event data objects.
- */
-export function translateChunkToAnthropicEvents(
-  chunk: ChatCompletionChunk,
-  state: AnthropicStreamState,
-  inputTokens: number,
-): Array<AnthropicStreamEventData> {
-  const events: Array<AnthropicStreamEventData> = []
-
-  const messageStartEvent = handleMessageStart(chunk, state, inputTokens)
-  if (messageStartEvent) {
-    events.push(messageStartEvent)
-  }
-
-  events.push(...handleTextContent(chunk, state))
-  events.push(...handleToolCalls(chunk, state))
-  events.push(...handleEndOfStream(chunk, state))
-
-  return events
-}

From 4bf35ec0e04bd5873002de4d4d5459cef823dccb Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 13:57:39 +0700
Subject: [PATCH 09/25] docs: Update mapping doc with latest API details and
 features

---
 docs/mapping.md                               | 110 ++++++++-------
 src/routes/messages/non-stream-translation.ts | 129 +++++++++++++-----
 2 files changed, 157 insertions(+), 82 deletions(-)

diff --git a/docs/mapping.md b/docs/mapping.md
index 2870da23c..93e1a2c52 100644
--- a/docs/mapping.md
+++ b/docs/mapping.md
@@ -1,25 +1,23 @@
-Of course. Here is the updated and corrected mapping document, now including the "Models" and "Token Count" endpoints.
-
----
-
 ### **Comprehensive API Translation: Anthropic Messages & OpenAI Chat Completions**
 
-This document provides a detailed, side-by-side technical mapping of the Anthropic Messages API and the OpenAI Chat Completions API, based on the provided API specifications.
+This document provides a detailed, side-by-side technical mapping of the Anthropic Messages API and the OpenAI Chat Completions API, verified against their respective specifications.
 
 ---
 
-### **1. API Endpoints & Authentication**
+### **1. Endpoints & Authentication**
 
-| Feature         | Anthropic Messages API    | OpenAI Chat Completions API          |
-| :-------------- | :------------------------ | :----------------------------------- |
-| **Endpoint**    | `POST /v1/messages`       | `POST /v1/chat/completions`          |
-| **Auth Header** | `x-api-key: YOUR_API_KEY` | `Authorization: Bearer YOUR_API_KEY` |
+| Feature              | Anthropic Messages API    | OpenAI Chat Completions API          |
+| :------------------- | :------------------------ | :----------------------------------- |
+| **Primary Endpoint** | `POST /v1/messages`       | `POST /v1/chat/completions`          |
+| **Auth Header**      | `x-api-key: YOUR_API_KEY` | `Authorization: Bearer YOUR_API_KEY` |
 
 ---
 
 ### **2. Core Request Parameters**
 
-| Parameter           | Anthropic Messages API                       | OpenAI Chat Completions API                                         |
+This table outlines the translation of primary request body fields.
+
+| Parameter           | Anthropic Messages API (`claude.md`)         | OpenAI Chat Completions API (`openapi.documented.yml`)              |
 | :------------------ | :------------------------------------------- | :------------------------------------------------------------------ |
 | **Model**           | `model` (e.g., `claude-3-7-sonnet-20250219`) | `model` (e.g., `gpt-4o`)                                            |
 | **System Prompt**   | `system` (A top-level string)                | Prepending a message with `role: "system"` to the `messages` array. |
@@ -48,31 +46,31 @@ Both APIs use a `messages` array, but the structure and content types differ.
 
 #### **3.2. Message Content Types**
 
-| Content Type    | Anthropic Messages API                                                                                             | OpenAI Chat Completions API                                                                                  |
-| :-------------- | :----------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------- |
-| **Text**        | `content` can be a single string or an array containing `{"type": "text", "text": "..."}`.                         | A message object's `content` property is a string, or an array containing `{"type": "text", "text": "..."}`. |
-| **Image**       | `content` array can contain `{"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}`. | `content` array can contain `{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}`.     |
-| **Tool Result** | A `user` message `content` array can contain `{"type": "tool_result", "tool_use_id": "...", "content": "..."}`.    | A distinct message object with `{"role": "tool", "tool_call_id": "...", "content": "..."}`.                  |
+| Content Type    | Anthropic Messages API (`claude.md`)                                                                               | OpenAI Chat Completions API (`openapi.documented.yml`)                                                   |
+| :-------------- | :----------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------- |
+| **Text**        | `content` can be a single string or an array with `{"type": "text", "text": "..."}`.                               | A message object's `content` is a string or an array with `{"type": "text", "text": "..."}`.             |
+| **Image**       | `content` array can contain `{"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}`. | `content` array can contain `{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}`. |
+| **Tool Result** | A `user` message `content` array can contain `{"type": "tool_result", "tool_use_id": "...", "content": "..."}`.    | A distinct message object: `{"role": "tool", "tool_call_id": "...", "content": "..."}`.                  |
 
 ---
 
 ### **4. Tool & Function Handling**
 
-| Feature                   | Anthropic Messages API                                                                                                                                       | OpenAI Chat Completions API                                                                                                                                                             |
-| :------------------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **Tool Definition**       | `tools` array. Each tool has `name`, `description`, and `input_schema`.                                                                                      | `tools` array. Each tool has `type: "function"` and a `function` object with `name`, `description`, and `parameters` (JSON Schema).                                                     |
-| **Tool Choice**           | `tool_choice` object with `type`: \<br\> - `"auto"`: Model decides. \<br\> - `"any"`: Forces model to use a tool. \<br\> - `"tool"`: Forces a specific tool. | `tool_choice` string or object: \<br\> - `"auto"`: Model decides. \<br\> - `"required"`: Forces model to call a tool. \<br\> - `{"type": "function", ...}`: Forces a specific function. |
-| **Tool Call in Response** | Appears in the `content` array as `{"type": "tool_use", "id": "...", "name": "...", "input": {...}}`.                                                        | Appears in the `message` object as a `tool_calls` array, with each call having an `id` and a `function` object with `name` and `arguments` (as a JSON string).                          |
+| Feature                   | Anthropic Messages API (`claude.md`)                                                                                                 | OpenAI Chat Completions API (`openapi.documented.yml`)                                                                                                         |
+| :------------------------ | :----------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Tool Definition**       | `tools` array. Each tool has `name`, `description`, and `input_schema`.                                                              | `tools` array. Each tool has `type: "function"` and a `function` object with `name`, `description`, and `parameters`.                                          |
+| **Tool Choice**           | `tool_choice` object with `type`: \<br\> - `"auto"` \<br\> - `"any"` (Forces use of a tool) \<br\> - `"tool"` (Forces specific tool) | `tool_choice` string or object: \<br\> - `"auto"` \<br\> - `"required"` (Forces use of a tool) \<br\> - `{"type": "function", ...}` (Forces specific function) |
+| **Tool Call in Response** | In `content` array as `{"type": "tool_use", "id": "...", "name": "...", "input": {...}}`.                                            | In `message` object as a `tool_calls` array, with `id` and `function` object (`name`, `arguments` as JSON string).                                             |
 
 ---
 
 ### **5. Response Structure**
 
-| Feature              | Anthropic Messages API                                                                                 | OpenAI Chat Completions API                                                                                  |
-| :------------------- | :----------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------- |
-| **Primary Object**   | A single response object.                                                                              | A `choices` array containing one or more message objects.                                                    |
-| **Stop Reason**      | `stop_reason` field with values like `end_turn`, `max_tokens`, `tool_use`, `stop_sequence`, `refusal`. | `finish_reason` field within each choice, with values like `stop`, `length`, `tool_calls`, `content_filter`. |
-| **Usage Statistics** | `usage` object with `input_tokens` and `output_tokens`.                                                | `usage` object with `prompt_tokens`, `completion_tokens`, and `total_tokens`.                                |
+| Feature              | Anthropic Messages API (`claude.md`)                                                           | OpenAI Chat Completions API (`openapi.documented.yml`)                           |
+| :------------------- | :--------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------- |
+| **Primary Object**   | A single response object.                                                                      | A `choices` array containing one or more message objects.                        |
+| **Stop Reason**      | `stop_reason` field. Values: `end_turn`, `max_tokens`, `tool_use`, `stop_sequence`, `refusal`. | `finish_reason` field. Values: `stop`, `length`, `tool_calls`, `content_filter`. |
+| **Usage Statistics** | `usage` object with `input_tokens` and `output_tokens`.                                        | `usage` object with `prompt_tokens`, `completion_tokens`, and `total_tokens`.    |
 
 ---
 
@@ -80,40 +78,56 @@ Both APIs use a `messages` array, but the structure and content types differ.
 
 #### **6.1. List Available Models**
 
-| Feature           | Anthropic Messages API                      | OpenAI Chat Completions API            |
-| :---------------- | :------------------------------------------ | :------------------------------------- |
-| **Endpoint**      | `GET /v1/models`                            | `GET /v1/models`                       |
-| **Response**      | Paginated list in `data` array.             | List in `data` array.                  |
-| **Object Fields** | `id`, `display_name`, `created_at`, `type`. | `id`, `created`, `owned_by`, `object`. |
+| Feature           | Anthropic Messages API             | OpenAI Chat Completions API |
+| :---------------- | :--------------------------------- | :-------------------------- |
+| **Endpoint**      | `GET /v1/models`                   | `GET /v1/models`            |
+| **Response**      | Paginated list in `data` array.    | List in `data` array.       |
+| **Object Fields** | `id`, `display_name`, `created_at` | `id`, `created`, `owned_by` |
 
 #### **6.2. Retrieve a Specific Model**
 
-| Feature           | Anthropic Messages API                      | OpenAI Chat Completions API            |
-| :---------------- | :------------------------------------------ | :------------------------------------- |
-| **Endpoint**      | `GET /v1/models/{model_id}`                 | `GET /v1/models/{model}`               |
-| **Response**      | A single `ModelInfo` object.                | A single `Model` object.               |
-| **Object Fields** | `id`, `display_name`, `created_at`, `type`. | `id`, `created`, `owned_by`, `object`. |
+| Feature      | Anthropic Messages API       | OpenAI Chat Completions API |
+| :----------- | :--------------------------- | :-------------------------- |
+| **Endpoint** | `GET /v1/models/{model_id}`  | `GET /v1/models/{model}`    |
+| **Response** | A single `ModelInfo` object. | A single `Model` object.    |
 
 #### **6.3. Count Tokens**
 
-| Feature           | Anthropic Messages API                                                                                  | OpenAI Chat Completions API                                                                                                                              |
-| :---------------- | :------------------------------------------------------------------------------------------------------ | :------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **Endpoint**      | `POST /v1/messages/count_tokens`                                                                        | **No Direct API Endpoint**                                                                                                                               |
-| **Functionality** | Counts tokens for a given message payload, including images and tools, without generating a completion. | Token counts are returned in the `usage` object only after a completion is generated. Client-side libraries like `tiktoken` must be used for estimation. |
-| **Response**      | `{"input_tokens": ...}`                                                                                 | N/A                                                                                                                                                      |
+| Feature           | Anthropic Messages API                                                                                 | OpenAI Chat Completions API                                                                                                                                        |
+| :---------------- | :----------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Endpoint**      | `POST /v1/messages/count_tokens`                                                                       | **No Direct API Endpoint**                                                                                                                                         |
+| **Functionality** | Counts tokens for a message payload (including images and tools) before making a full completion call. | Token counts are returned in the `usage` object only _after_ a completion is generated. Client-side libraries (e.g., `tiktoken`) must be used for pre-calculation. |
+| **Response**      | `{"input_tokens": ...}`                                                                                | N/A                                                                                                                                                                |
 
 ---
 
-### **7. Streaming & Error Handling**
+### **7. Streaming**
+
+Both APIs support streaming via Server-Sent Events (SSE), but the event structure is fundamentally different.
+
+- **Anthropic:** Emits a sequence of distinct, named events such as `message_start`, `content_block_start`, `content_block_delta`, and `message_stop`. This provides a highly structured stream.
+- **OpenAI:** Emits a series of unnamed `data:` events containing `chat.completion.chunk` objects with partial updates. The stream terminates with `data: [DONE]`.
+
+A translation layer must buffer OpenAI's delta chunks to reconstruct Anthropic's structured event stream, including generating necessary IDs and calculating token usage for the final event.
+
+---
+
+### **8. Error Handling**
+
+Error responses are structurally similar, containing a main `error` object. HTTP status codes generally correspond.
 
-- **Streaming:** Both APIs use Server-Sent Events (SSE). A translation layer must convert OpenAI's stream of `chat.completion.chunk` objects into Anthropic's more granular, named-event stream (`message_start`, `content_block_delta`, etc.).
-- **Error Handling:** Error responses are structurally similar, containing a main `error` object. HTTP status codes generally correspond (e.g., 400 for bad requests, 401 for auth issues, 429 for rate limits).
+| HTTP Code | Anthropic `error.type`  | OpenAI `error.type`       |
+| :-------- | :---------------------- | :------------------------ |
+| 400       | `invalid_request_error` | `invalid_request_error`   |
+| 401       | `authentication_error`  | `authentication_error`    |
+| 403       | `permission_error`      | `permission_denied_error` |
+| 429       | `rate_limit_error`      | `rate_limit_error`        |
+| 500       | `api_error`             | `internal_server_error`   |
 
 ---
 
-### **8. Summary of Key Differences**
+### **9. Summary of Key Asymmetrical Features**
 
-- **Token Counting:** Anthropic provides a dedicated API endpoint for counting tokens before sending a request, while OpenAI does not.
-- **`top_k`:** Supported by Anthropic for request sampling, but not by OpenAI's Chat Completions API.
-- **Model Information:** The APIs return different metadata for their models. Anthropic provides a `display_name`, whereas OpenAI provides `owned_by`.
+- **`top_k` Sampling:** Supported by Anthropic, but not by OpenAI's Chat Completions API.
 - **Partial Assistant Prefill:** Anthropic allows providing a prefix for the assistant's response, a feature OpenAI does not support.
+- **Dedicated Token Counting:** Anthropic offers a specific API endpoint to count tokens before a call, whereas OpenAI does not.
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index 3f91238a5..8121334f0 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -42,51 +42,112 @@ export function translateToOpenAI(
   }
 }
 
-function translateAnthropicMessagesToOpenAI(
-  anthropicMessages: Array<AnthropicMessage>,
+function handleSystemPrompt(
   system: string | Array<AnthropicTextBlock> | undefined,
-): Array<Message> {
-  const messages: Array<Message> = []
+  messages: Array<Message>,
+) {
+  if (!system) {
+    return
+  }
 
-  if (system) {
-    if (typeof system === "string") {
-      messages.push({ role: "system", content: system })
-    } else {
-      const systemText = system.map((block) => block.text).join("\n\n")
-      messages.push({ role: "system", content: systemText })
+  if (typeof system === "string") {
+    messages.push({ role: "system", content: system })
+  } else {
+    const systemText = system.map((block) => block.text).join("\n\n")
+    messages.push({ role: "system", content: systemText })
+  }
+}
+
+function handleUserMessage(
+  message: AnthropicMessage,
+  messages: Array<Message>,
+) {
+  if (Array.isArray(message.content)) {
+    const toolResultBlocks = message.content.filter(
+      (block): block is AnthropicToolResultBlock =>
+        block.type === "tool_result",
+    )
+    const otherBlocks = message.content.filter(
+      (block) => block.type !== "tool_result",
+    )
+
+    if (otherBlocks.length > 0) {
+      messages.push({
+        role: "user",
+        content: mapContent(otherBlocks),
+      })
     }
+
+    for (const block of toolResultBlocks) {
+      messages.push({
+        role: "tool",
+        tool_call_id: block.tool_use_id,
+        content: block.content,
+      })
+    }
+  } else {
+    messages.push({
+      role: "user",
+      content: mapContent(message.content),
+    })
   }
+}
 
-  for (const message of anthropicMessages) {
-    if (message.role === "user" && Array.isArray(message.content)) {
-      const toolResultBlocks = message.content.filter(
-        (block): block is AnthropicToolResultBlock =>
-          block.type === "tool_result",
-      )
-      const otherBlocks = message.content.filter(
-        (block) => block.type !== "tool_result",
-      )
-
-      if (otherBlocks.length > 0) {
-        messages.push({
-          role: "user",
-          content: mapContent(otherBlocks),
-        })
-      }
+function handleAssistantMessage(
+  message: AnthropicMessage,
+  messages: Array<Message>,
+) {
+  if (Array.isArray(message.content)) {
+    const toolUseBlocks = message.content.filter(
+      (block): block is AnthropicToolUseBlock =>
+        (block as { type: string }).type === "tool_use",
+    )
 
-      for (const block of toolResultBlocks) {
-        messages.push({
-          role: "tool",
-          tool_call_id: block.tool_use_id,
-          content: block.content,
-        })
-      }
+    const textBlocks = message.content.filter(
+      (block): block is AnthropicTextBlock => block.type === "text",
+    )
+
+    if (toolUseBlocks.length > 0) {
+      messages.push({
+        role: "assistant",
+        content: textBlocks.map((b) => b.text).join("\n\n") || null,
+        tool_calls: toolUseBlocks.map((toolUse) => ({
+          id: toolUse.id,
+          type: "function",
+          function: {
+            name: toolUse.name,
+            arguments: JSON.stringify(toolUse.input),
+          },
+        })),
+      })
     } else {
+      // No tool use, just regular content
       messages.push({
-        role: message.role,
+        role: "assistant",
         content: mapContent(message.content),
       })
     }
+  } else {
+    messages.push({
+      role: "assistant",
+      content: mapContent(message.content),
+    })
+  }
+}
+
+function translateAnthropicMessagesToOpenAI(
+  anthropicMessages: Array<AnthropicMessage>,
+  system: string | Array<AnthropicTextBlock> | undefined,
+): Array<Message> {
+  const messages: Array<Message> = []
+  handleSystemPrompt(system, messages)
+
+  for (const message of anthropicMessages) {
+    if (message.role === "user") {
+      handleUserMessage(message, messages)
+    } else {
+      handleAssistantMessage(message, messages)
+    }
   }
   return messages
 }

From da1ad2ebfc0e2defe0e9aba5096c0bda802d767f Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 15:32:02 +0700
Subject: [PATCH 10/25] chore: Updated dependencies hono and knip

---
 bun.lock                                      |   8 +-
 package.json                                  |   4 +-
 src/routes/messages/anthropic-types.ts        |   5 +-
 src/routes/messages/non-stream-translation.ts | 105 +++++++++---------
 4 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/bun.lock b/bun.lock
index b9161d361..1ab1e432e 100644
--- a/bun.lock
+++ b/bun.lock
@@ -8,7 +8,7 @@
         "consola": "^3.4.2",
         "fetch-event-stream": "^0.1.5",
         "gpt-tokenizer": "^3.0.1",
-        "hono": "^4.7.11",
+        "hono": "^4.8.1",
         "srvx": "^0.8.0",
       },
       "devDependencies": {
@@ -17,7 +17,7 @@
         "bumpp": "^10.2.0",
         "eslint": "^9.29.0",
         "jiti": "^2.4.2",
-        "knip": "^5.61.1",
+        "knip": "^5.61.2",
         "lint-staged": "^16.1.2",
         "prettier-plugin-packagejson": "^2.5.15",
         "simple-git-hooks": "^2.13.0",
@@ -569,7 +569,7 @@
 
     "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],
 
-    "hono": ["hono@4.7.11", "", {}, "sha512-rv0JMwC0KALbbmwJDEnxvQCeJh+xbS3KEWW5PC9cMJ08Ur9xgatI0HmtgYZfOdOSOeYsp5LO2cOhdI8cLEbDEQ=="],
+    "hono": ["hono@4.8.1", "", {}, "sha512-ErA2ifywnSmcnB5XDuFqGDfXJ9xuAJR2C/8cZAk6vDaOCzofB8eNlha/wZWIiamREzWk94S9Z7wHsnKQHn7Niw=="],
 
     "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
 
@@ -665,7 +665,7 @@
 
     "keyv": ["keyv@4.5.4", "", { "dependencies": { "json-buffer": "3.0.1" } }, "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw=="],
 
-    "knip": ["knip@5.61.1", "", { "dependencies": { "@nodelib/fs.walk": "^1.2.3", "fast-glob": "^3.3.3", "formatly": "^0.2.4", "jiti": "^2.4.2", "js-yaml": "^4.1.0", "minimist": "^1.2.8", "oxc-resolver": "^11.1.0", "picocolors": "^1.1.1", "picomatch": "^4.0.1", "smol-toml": "^1.3.4", "strip-json-comments": "5.0.2", "zod": "^3.22.4", "zod-validation-error": "^3.0.3" }, "peerDependencies": { "@types/node": ">=18", "typescript": ">=5.0.4" }, "bin": { "knip": "bin/knip.js", "knip-bun": "bin/knip-bun.js" } }, "sha512-keywAzpu8R9S50JRT3qxilb1i/pv3ztBHhZ3tRuHvRclqfhfPkY7kb/G6l4q7zozbyndidSr7IScvayG76HtkA=="],
+    "knip": ["knip@5.61.2", "", { "dependencies": { "@nodelib/fs.walk": "^1.2.3", "fast-glob": "^3.3.3", "formatly": "^0.2.4", "jiti": "^2.4.2", "js-yaml": "^4.1.0", "minimist": "^1.2.8", "oxc-resolver": "^11.1.0", "picocolors": "^1.1.1", "picomatch": "^4.0.1", "smol-toml": "^1.3.4", "strip-json-comments": "5.0.2", "zod": "^3.22.4", "zod-validation-error": "^3.0.3" }, "peerDependencies": { "@types/node": ">=18", "typescript": ">=5.0.4" }, "bin": { "knip": "bin/knip.js", "knip-bun": "bin/knip-bun.js" } }, "sha512-ZBv37zDvZj0/Xwk0e93xSjM3+5bjxgqJ0PH2GlB5tnWV0ktXtmatWLm+dLRUCT/vpO3SdGz2nNAfvVhuItUNcQ=="],
 
     "language-subtag-registry": ["language-subtag-registry@0.3.23", "", {}, "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ=="],
 
diff --git a/package.json b/package.json
index 709bab9b0..c5c19922e 100644
--- a/package.json
+++ b/package.json
@@ -42,7 +42,7 @@
     "consola": "^3.4.2",
     "fetch-event-stream": "^0.1.5",
     "gpt-tokenizer": "^3.0.1",
-    "hono": "^4.7.11",
+    "hono": "^4.8.1",
     "srvx": "^0.8.0"
   },
   "devDependencies": {
@@ -51,7 +51,7 @@
     "bumpp": "^10.2.0",
     "eslint": "^9.29.0",
     "jiti": "^2.4.2",
-    "knip": "^5.61.1",
+    "knip": "^5.61.2",
     "lint-staged": "^16.1.2",
     "prettier-plugin-packagejson": "^2.5.15",
     "simple-git-hooks": "^2.13.0",
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 4e27ad58f..9faf3af3a 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -91,7 +91,10 @@ export interface AnthropicToolUseBlock {
 // Anthropic Stream Event Types
 export interface AnthropicMessageStartEvent {
   type: "message_start"
-  message: Omit<AnthropicResponse, "content" | "stop_reason" | "stop_sequence"> & {
+  message: Omit<
+    AnthropicResponse,
+    "content" | "stop_reason" | "stop_sequence"
+  > & {
     content: []
     stop_reason: null
     stop_sequence: null
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index 8121334f0..33dbd337f 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -42,26 +42,38 @@ export function translateToOpenAI(
   }
 }
 
+function translateAnthropicMessagesToOpenAI(
+  anthropicMessages: Array<AnthropicMessage>,
+  system: string | Array<AnthropicTextBlock> | undefined,
+): Array<Message> {
+  const systemMessages = handleSystemPrompt(system)
+
+  const otherMessages = anthropicMessages.flatMap((message) =>
+    message.role === "user" ?
+      handleUserMessage(message)
+    : handleAssistantMessage(message),
+  )
+
+  return [...systemMessages, ...otherMessages]
+}
+
 function handleSystemPrompt(
   system: string | Array<AnthropicTextBlock> | undefined,
-  messages: Array<Message>,
-) {
+): Array<Message> {
   if (!system) {
-    return
+    return []
   }
 
   if (typeof system === "string") {
-    messages.push({ role: "system", content: system })
+    return [{ role: "system", content: system }]
   } else {
     const systemText = system.map((block) => block.text).join("\n\n")
-    messages.push({ role: "system", content: systemText })
+    return [{ role: "system", content: systemText }]
   }
 }
 
-function handleUserMessage(
-  message: AnthropicMessage,
-  messages: Array<Message>,
-) {
+function handleUserMessage(message: AnthropicMessage): Array<Message> {
+  const newMessages: Array<Message> = []
   if (Array.isArray(message.content)) {
     const toolResultBlocks = message.content.filter(
       (block): block is AnthropicToolResultBlock =>
@@ -72,31 +84,29 @@ function handleUserMessage(
     )
 
     if (otherBlocks.length > 0) {
-      messages.push({
+      newMessages.push({
         role: "user",
         content: mapContent(otherBlocks),
       })
     }
 
     for (const block of toolResultBlocks) {
-      messages.push({
+      newMessages.push({
         role: "tool",
         tool_call_id: block.tool_use_id,
         content: block.content,
       })
     }
   } else {
-    messages.push({
+    newMessages.push({
       role: "user",
       content: mapContent(message.content),
     })
   }
+  return newMessages
 }
 
-function handleAssistantMessage(
-  message: AnthropicMessage,
-  messages: Array<Message>,
-) {
+function handleAssistantMessage(message: AnthropicMessage): Array<Message> {
   if (Array.isArray(message.content)) {
     const toolUseBlocks = message.content.filter(
       (block): block is AnthropicToolUseBlock =>
@@ -108,48 +118,37 @@ function handleAssistantMessage(
     )
 
     if (toolUseBlocks.length > 0) {
-      messages.push({
-        role: "assistant",
-        content: textBlocks.map((b) => b.text).join("\n\n") || null,
-        tool_calls: toolUseBlocks.map((toolUse) => ({
-          id: toolUse.id,
-          type: "function",
-          function: {
-            name: toolUse.name,
-            arguments: JSON.stringify(toolUse.input),
-          },
-        })),
-      })
+      return [
+        {
+          role: "assistant",
+          content: textBlocks.map((b) => b.text).join("\n\n") || null,
+          tool_calls: toolUseBlocks.map((toolUse) => ({
+            id: toolUse.id,
+            type: "function",
+            function: {
+              name: toolUse.name,
+              arguments: JSON.stringify(toolUse.input),
+            },
+          })),
+        },
+      ]
     } else {
       // No tool use, just regular content
-      messages.push({
-        role: "assistant",
-        content: mapContent(message.content),
-      })
+      return [
+        {
+          role: "assistant",
+          content: mapContent(message.content),
+        },
+      ]
     }
   } else {
-    messages.push({
-      role: "assistant",
-      content: mapContent(message.content),
-    })
-  }
-}
-
-function translateAnthropicMessagesToOpenAI(
-  anthropicMessages: Array<AnthropicMessage>,
-  system: string | Array<AnthropicTextBlock> | undefined,
-): Array<Message> {
-  const messages: Array<Message> = []
-  handleSystemPrompt(system, messages)
-
-  for (const message of anthropicMessages) {
-    if (message.role === "user") {
-      handleUserMessage(message, messages)
-    } else {
-      handleAssistantMessage(message, messages)
-    }
+    return [
+      {
+        role: "assistant",
+        content: mapContent(message.content),
+      },
+    ]
   }
-  return messages
 }
 
 function mapContent(

From 44f733f0e8d5819355a82c97eab9678c0e91ca95 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 18:40:48 +0700
Subject: [PATCH 11/25] feat: Improve Anthropic message type handling and
 mapping

---
 src/routes/messages/anthropic-types.ts        | 51 ++++++++++-------
 src/routes/messages/non-stream-translation.ts | 57 ++++++++++---------
 2 files changed, 61 insertions(+), 47 deletions(-)

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 9faf3af3a..02f178524 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -20,16 +20,6 @@ export interface AnthropicMessagesPayload {
   }
 }
 
-export interface AnthropicMessage {
-  role: "user" | "assistant"
-  content: string | Array<AnthropicContentBlock>
-}
-
-export type AnthropicContentBlock =
-  | AnthropicTextBlock
-  | AnthropicImageBlock
-  | AnthropicToolResultBlock
-
 export interface AnthropicTextBlock {
   type: "text"
   text: string
@@ -51,6 +41,34 @@ export interface AnthropicToolResultBlock {
   is_error?: boolean
 }
 
+export interface AnthropicToolUseBlock {
+  type: "tool_use"
+  id: string
+  name: string
+  input: Record<string, unknown>
+}
+
+export type AnthropicUserContentBlock =
+  | AnthropicTextBlock
+  | AnthropicImageBlock
+  | AnthropicToolResultBlock
+
+export type AnthropicAssistantContentBlock =
+  | AnthropicTextBlock
+  | AnthropicToolUseBlock
+
+export interface AnthropicUserMessage {
+  role: "user"
+  content: string | Array<AnthropicUserContentBlock>
+}
+
+export interface AnthropicAssistantMessage {
+  role: "assistant"
+  content: string | Array<AnthropicAssistantContentBlock>
+}
+
+export type AnthropicMessage = AnthropicUserMessage | AnthropicAssistantMessage
+
 export interface AnthropicTool {
   name: string
   description?: string
@@ -61,7 +79,7 @@ export interface AnthropicResponse {
   id: string
   type: "message"
   role: "assistant"
-  content: Array<AnthropicResponseContentBlock>
+  content: Array<AnthropicAssistantContentBlock>
   model: string
   stop_reason:
     | "end_turn"
@@ -77,16 +95,7 @@ export interface AnthropicResponse {
   }
 }
 
-export type AnthropicResponseContentBlock =
-  | AnthropicTextBlock
-  | AnthropicToolUseBlock
-
-export interface AnthropicToolUseBlock {
-  type: "tool_use"
-  id: string
-  name: string
-  input: Record<string, unknown>
-}
+export type AnthropicResponseContentBlock = AnthropicAssistantContentBlock
 
 // Anthropic Stream Event Types
 export interface AnthropicMessageStartEvent {
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index 33dbd337f..aaef31e4e 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -9,7 +9,8 @@ import {
 } from "~/services/copilot/create-chat-completions"
 
 import {
-  type AnthropicContentBlock,
+  type AnthropicAssistantContentBlock,
+  type AnthropicAssistantMessage,
   type AnthropicMessage,
   type AnthropicMessagesPayload,
   type AnthropicResponse,
@@ -17,6 +18,8 @@ import {
   type AnthropicTool,
   type AnthropicToolResultBlock,
   type AnthropicToolUseBlock,
+  type AnthropicUserContentBlock,
+  type AnthropicUserMessage,
 } from "./anthropic-types"
 import { mapOpenAIStopReasonToAnthropic } from "./utils"
 
@@ -72,8 +75,9 @@ function handleSystemPrompt(
   }
 }
 
-function handleUserMessage(message: AnthropicMessage): Array<Message> {
+function handleUserMessage(message: AnthropicUserMessage): Array<Message> {
   const newMessages: Array<Message> = []
+
   if (Array.isArray(message.content)) {
     const toolResultBlocks = message.content.filter(
       (block): block is AnthropicToolResultBlock =>
@@ -103,22 +107,32 @@ function handleUserMessage(message: AnthropicMessage): Array<Message> {
       content: mapContent(message.content),
     })
   }
+
   return newMessages
 }
 
-function handleAssistantMessage(message: AnthropicMessage): Array<Message> {
-  if (Array.isArray(message.content)) {
-    const toolUseBlocks = message.content.filter(
-      (block): block is AnthropicToolUseBlock =>
-        (block as { type: string }).type === "tool_use",
-    )
+function handleAssistantMessage(
+  message: AnthropicAssistantMessage,
+): Array<Message> {
+  if (!Array.isArray(message.content)) {
+    return [
+      {
+        role: "assistant",
+        content: mapContent(message.content),
+      },
+    ]
+  }
 
-    const textBlocks = message.content.filter(
-      (block): block is AnthropicTextBlock => block.type === "text",
-    )
+  const toolUseBlocks = message.content.filter(
+    (block): block is AnthropicToolUseBlock => block.type === "tool_use",
+  )
+
+  const textBlocks = message.content.filter(
+    (block): block is AnthropicTextBlock => block.type === "text",
+  )
 
-    if (toolUseBlocks.length > 0) {
-      return [
+  return toolUseBlocks.length > 0 ?
+      [
         {
           role: "assistant",
           content: textBlocks.map((b) => b.text).join("\n\n") || null,
@@ -132,27 +146,18 @@ function handleAssistantMessage(message: AnthropicMessage): Array<Message> {
           })),
         },
       ]
-    } else {
-      // No tool use, just regular content
-      return [
+    : [
         {
           role: "assistant",
           content: mapContent(message.content),
         },
       ]
-    }
-  } else {
-    return [
-      {
-        role: "assistant",
-        content: mapContent(message.content),
-      },
-    ]
-  }
 }
 
 function mapContent(
-  content: string | Array<AnthropicContentBlock>,
+  content:
+    | string
+    | Array<AnthropicUserContentBlock | AnthropicAssistantContentBlock>,
 ): string | Array<ContentPart> | null {
   if (typeof content === "string") {
     return content

From b94478ed1f6a581d144f7f58e9ced8e82ec3aefb Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 19:27:18 +0700
Subject: [PATCH 12/25] feat: Add streaming support and examples for Anthropic
 messages API

---
 docs/anthropic.md                         | 287 +++++++++++++++++++++-
 src/routes/messages/anthropic-types.ts    |  34 ++-
 src/routes/messages/stream-translation.ts | 155 ++++++++++++
 3 files changed, 472 insertions(+), 4 deletions(-)

diff --git a/docs/anthropic.md b/docs/anthropic.md
index d41043a24..9c4766cb6 100644
--- a/docs/anthropic.md
+++ b/docs/anthropic.md
@@ -77,9 +77,41 @@ A successful **non-streaming** request returns a `Message` object.
 
 #### Streaming Response (200 OK)
 
-If `stream: true` is set, the API streams back a sequence of server-sent events. The response is a series of JSON events that incrementally build the complete message object.
+When `stream: true` is set, the API streams the response using server-sent events (SSE). Each event is named (e.g., `event: message_start`) and contains associated JSON data.
 
-According to the documentation, the `stop_reason` provides insight into the stream's state: in the initial `message_start` event, the `stop_reason` field will be `null`. In all other events, it will be non-null once the stopping condition is known.
+The event flow for a stream is as follows:
+
+1.  `message_start`: Contains a `Message` object with empty `content`.
+2.  A series of content blocks. Each block has a `content_block_start` event, one or more `content_block_delta` events, and a `content_block_stop` event. The `index` in these events corresponds to the content block's position in the final `content` array.
+3.  One or more `message_delta` events, which indicate top-level changes to the final `Message` object. The `usage` field in this event contains cumulative token counts.
+4.  A final `message_stop` event.
+
+The stream may also include `ping` events to keep the connection alive and `error` events if issues occur.
+
+##### Content Block Delta Types
+
+Each `content_block_delta` event contains a `delta` object that updates a content block.
+
+- **Text Delta**: Updates a `text` content block.
+
+  ```json
+  event: content_block_delta
+  data: {"type": "content_block_delta","index": 0,"delta": {"type": "text_delta", "text": "ello frien"}}
+  ```
+
+- **Input JSON Delta**: Used for `tool_use` blocks, these deltas contain partial JSON strings for the tool's `input` field. The partial strings must be accumulated and parsed into a final JSON object upon receiving the `content_block_stop` event.
+
+  ```json
+  event: content_block_delta
+  data: {"type": "content_block_delta","index": 1,"delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}}
+  ```
+
+- **Thinking Delta**: When extended thinking is enabled, these deltas update the `thinking` field of a thinking content block. A special `signature_delta` event is sent just before the `content_block_stop` to verify the block's integrity.
+
+  ```json
+  event: content_block_delta
+  data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\n\n1. First break down 27 * 453"}}
+  ```
 
 #### The Usage Object
 
@@ -93,6 +125,257 @@ The `usage` object details billing and rate-limit token counts.
 | `cache_read_input_tokens`     | integer | The number of input tokens read from the cache.                             |
 | `service_tier`                | string  | The service tier used for the request (`standard`, `priority`, or `batch`). |
 
+### Streaming Examples
+
+#### Basic Streaming Request
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+     --header "anthropic-version: 2023-06-01" \
+     --header "content-type: application/json" \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --data \
+'{
+  "model": "claude-opus-4-20250514",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "max_tokens": 256,
+  "stream": true
+}'
+```
+
+**Response:**
+
+```json
+event: message_start
+data: {"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-opus-4-20250514", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25, "output_tokens": 1}}}
+
+event: content_block_start
+data: {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}}
+
+event: ping
+data: {"type": "ping"}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "Hello"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "!"}}
+
+event: content_block_stop
+data: {"type": "content_block_stop", "index": 0}
+
+event: message_delta
+data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence":null}, "usage": {"output_tokens": 15}}
+
+event: message_stop
+data: {"type": "message_stop"}
+```
+
+#### Streaming Request with Tool Use
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+  -H "content-type: application/json" \
+  -H "x-api-key: $ANTHROPIC_API_KEY" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{
+    "model": "claude-opus-4-20250514",
+    "max_tokens": 1024,
+    "tools": [
+      {
+        "name": "get_weather",
+        "description": "Get the current weather in a given location",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "location": {
+              "type": "string",
+              "description": "The city and state, e.g. San Francisco, CA"
+            }
+          },
+          "required": ["location"]
+        }
+      }
+    ],
+    "tool_choice": {"type": "any"},
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is the weather like in San Francisco?"
+      }
+    ],
+    "stream": true
+  }'
+```
+
+**Response:**
+
+```json
+event: message_start
+data: {"type":"message_start","message":{"id":"msg_014p7gG3wDgGV9EUtLvnow3U","type":"message","role":"assistant","model":"claude-opus-4-20250514","stop_sequence":null,"usage":{"input_tokens":472,"output_tokens":2},"content":[],"stop_reason":null}}
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
+
+event: ping
+data: {"type": "ping"}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Okay"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" let"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" check"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" the"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" weather"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" for"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" San"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Francisco"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" CA"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":":"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: content_block_start
+data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"location\":"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" \"San"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" Francisc"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"o,"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" CA\""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":", "}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"unit\": \"fah"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"renheit\"}"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":1}
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":89}}
+
+event: message_stop
+data: {"type":"message_stop"}
+```
+
+#### Streaming Request with Extended Thinking
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --header "anthropic-version: 2023-06-01" \
+     --header "content-type: application/json" \
+     --data \
+'{
+    "model": "claude-opus-4-20250514",
+    "max_tokens": 20000,
+    "stream": true,
+    "thinking": {
+        "type": "enabled",
+        "budget_tokens": 16000
+    },
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is 27 * 453?"
+        }
+    ]
+}'
+```
+
+**Response:**
+
+```json
+event: message_start
+data: {"type": "message_start", "message": {"id": "msg_01...", "type": "message", "role": "assistant", "content": [], "model": "claude-opus-4-20250514", "stop_reason": null, "stop_sequence": null}}
+
+event: content_block_start
+data: {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking", "thinking": ""}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\n\n1. First break down 27 * 453"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n2. 453 = 400 + 50 + 3"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n3. 27 * 400 = 10,800"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n4. 27 * 50 = 1,350"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n5. 27 * 3 = 81"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n6. 10,800 + 1,350 + 81 = 12,231"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
+
+event: content_block_stop
+data: {"type": "content_block_stop", "index": 0}
+
+event: content_block_start
+data: {"type": "content_block_start", "index": 1, "content_block": {"type": "text", "text": ""}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 1, "delta": {"type": "text_delta", "text": "27 * 453 = 12,231"}}
+
+event: content_block_stop
+data: {"type": "content_block_stop", "index": 1}
+
+event: message_delta
+data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence": null}}
+
+event: message_stop
+data: {"type": "message_stop"}
+```
+
 ### Count Message Tokens
 
 Calculates the number of tokens for a given set of messages without creating it.
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 02f178524..df7187c72 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -18,6 +18,11 @@ export interface AnthropicMessagesPayload {
     type: "auto" | "any" | "tool" | "none"
     name?: string
   }
+  thinking?: {
+    type: "enabled"
+    budget_tokens?: number
+  }
+  service_tier?: "auto" | "standard_only"
 }
 
 export interface AnthropicTextBlock {
@@ -48,6 +53,11 @@ export interface AnthropicToolUseBlock {
   input: Record<string, unknown>
 }
 
+export interface AnthropicThinkingBlock {
+  type: "thinking"
+  thinking: string
+}
+
 export type AnthropicUserContentBlock =
   | AnthropicTextBlock
   | AnthropicImageBlock
@@ -56,6 +66,7 @@ export type AnthropicUserContentBlock =
 export type AnthropicAssistantContentBlock =
   | AnthropicTextBlock
   | AnthropicToolUseBlock
+  | AnthropicThinkingBlock
 
 export interface AnthropicUserMessage {
   role: "user"
@@ -92,6 +103,9 @@ export interface AnthropicResponse {
   usage: {
     input_tokens: number
     output_tokens: number
+    cache_creation_input_tokens?: number
+    cache_read_input_tokens?: number
+    service_tier?: "standard" | "priority" | "batch"
   }
 }
 
@@ -118,6 +132,7 @@ export interface AnthropicContentBlockStartEvent {
     | (Omit<AnthropicToolUseBlock, "input"> & {
         input: Record<string, unknown>
       })
+    | { type: "thinking"; thinking: string }
 }
 
 export interface AnthropicContentBlockDeltaEvent {
@@ -126,6 +141,8 @@ export interface AnthropicContentBlockDeltaEvent {
   delta:
     | { type: "text_delta"; text: string }
     | { type: "input_json_delta"; partial_json: string }
+    | { type: "thinking_delta"; thinking: string }
+    | { type: "signature_delta"; signature: string }
 }
 
 export interface AnthropicContentBlockStopEvent {
@@ -139,14 +156,25 @@ export interface AnthropicMessageDeltaEvent {
     stop_reason?: AnthropicResponse["stop_reason"]
     stop_sequence?: string | null
   }
-  // OpenAI does not provide token usage per chunk, so this is omitted.
-  // usage: { output_tokens: number }
+  usage?: { output_tokens: number }
 }
 
 export interface AnthropicMessageStopEvent {
   type: "message_stop"
 }
 
+export interface AnthropicPingEvent {
+  type: "ping"
+}
+
+export interface AnthropicErrorEvent {
+  type: "error"
+  error: {
+    type: string
+    message: string
+  }
+}
+
 export type AnthropicStreamEventData =
   | AnthropicMessageStartEvent
   | AnthropicContentBlockStartEvent
@@ -154,6 +182,8 @@ export type AnthropicStreamEventData =
   | AnthropicContentBlockStopEvent
   | AnthropicMessageDeltaEvent
   | AnthropicMessageStopEvent
+  | AnthropicPingEvent
+  | AnthropicErrorEvent
 
 // State for streaming translation
 export interface AnthropicStreamState {
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
index e69de29bb..681598636 100644
--- a/src/routes/messages/stream-translation.ts
+++ b/src/routes/messages/stream-translation.ts
@@ -0,0 +1,155 @@
+import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicStreamEventData,
+  type AnthropicStreamState,
+} from "./anthropic-types"
+import { mapOpenAIStopReasonToAnthropic } from "./utils"
+
+function isToolBlockOpen(state: AnthropicStreamState): boolean {
+  if (!state.contentBlockOpen) {
+    return false
+  }
+  // Check if the current block index corresponds to any known tool call
+  return Object.values(state.toolCalls).some(
+    (tc) => tc.anthropicBlockIndex === state.contentBlockIndex,
+  )
+}
+
+// eslint-disable-next-line max-lines-per-function, complexity
+export function translateChunkToAnthropicEvents(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+  inputTokens: number,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+  const choice = chunk.choices[0]
+  const { delta } = choice
+
+  if (!state.messageStartSent) {
+    events.push({
+      type: "message_start",
+      message: {
+        id: chunk.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: chunk.model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 1, // Anthropic requires this to be > 0
+        },
+      },
+    })
+    state.messageStartSent = true
+  }
+
+  if (delta.content) {
+    if (isToolBlockOpen(state)) {
+      // A tool block was open, so close it before starting a text block.
+      events.push({
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      })
+      state.contentBlockIndex++
+      state.contentBlockOpen = false
+    }
+
+    if (!state.contentBlockOpen) {
+      events.push({
+        type: "content_block_start",
+        index: state.contentBlockIndex,
+        content_block: {
+          type: "text",
+          text: "",
+        },
+      })
+      state.contentBlockOpen = true
+    }
+
+    events.push({
+      type: "content_block_delta",
+      index: state.contentBlockIndex,
+      delta: {
+        type: "text_delta",
+        text: delta.content,
+      },
+    })
+  }
+
+  if (delta.tool_calls) {
+    for (const toolCall of delta.tool_calls) {
+      if (toolCall.id && toolCall.function?.name) {
+        // New tool call starting.
+        if (state.contentBlockOpen) {
+          // Close any previously open block.
+          events.push({
+            type: "content_block_stop",
+            index: state.contentBlockIndex,
+          })
+          state.contentBlockIndex++
+          state.contentBlockOpen = false
+        }
+
+        const anthropicBlockIndex = state.contentBlockIndex
+        state.toolCalls[toolCall.index] = {
+          id: toolCall.id,
+          name: toolCall.function.name,
+          anthropicBlockIndex,
+        }
+
+        events.push({
+          type: "content_block_start",
+          index: anthropicBlockIndex,
+          content_block: {
+            type: "tool_use",
+            id: toolCall.id,
+            name: toolCall.function.name,
+            input: {},
+          },
+        })
+        state.contentBlockOpen = true
+      }
+
+      if (toolCall.function?.arguments) {
+        const toolCallInfo = state.toolCalls[toolCall.index]
+        if (toolCallInfo) {
+          events.push({
+            type: "content_block_delta",
+            index: toolCallInfo.anthropicBlockIndex,
+            delta: {
+              type: "input_json_delta",
+              partial_json: toolCall.function.arguments,
+            },
+          })
+        }
+      }
+    }
+  }
+
+  if (choice.finish_reason) {
+    if (state.contentBlockOpen) {
+      events.push({
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      })
+      state.contentBlockOpen = false
+    }
+
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+        stop_sequence: null,
+      },
+    })
+
+    events.push({
+      type: "message_stop",
+    })
+  }
+
+  return events
+}

From 7c175d62282235ce03d39b3096a7f6c0e71a0728 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 19:51:27 +0700
Subject: [PATCH 13/25] feat: Translate OpenAI stream to Anthropic stream
 events

---
 src/routes/messages/anthropic-types.ts        |   1 +
 src/routes/messages/handler.ts                |  91 +++-
 src/routes/messages/non-stream-translation.ts |   4 +-
 .../messages/openai-anthropic-translation.ts  | 499 ------------------
 src/routes/messages/stream-translation.ts     |  19 +-
 5 files changed, 110 insertions(+), 504 deletions(-)
 delete mode 100644 src/routes/messages/openai-anthropic-translation.ts

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index df7187c72..31c6e86e2 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -99,6 +99,7 @@ export interface AnthropicResponse {
     | "tool_use"
     | "pause_turn"
     | "refusal"
+    | null
   stop_sequence: string | null
   usage: {
     input_tokens: number
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 25e8c1fb2..fd8c03863 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -1,3 +1,92 @@
 import type { Context } from "hono"
 
-export async function handleCompletion(c: Context) {}
+import consola from "consola"
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createChatCompletions,
+  type ChatCompletionChunk,
+  type ChatCompletionResponse,
+} from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicMessagesPayload,
+  type AnthropicStreamState,
+} from "./anthropic-types"
+import {
+  translateToAnthropic,
+  translateToOpenAI,
+} from "./non-stream-translation"
+import { translateChunkToAnthropicEvents } from "./stream-translation"
+
+// eslint-disable-next-line max-lines-per-function
+export async function handleCompletion(c: Context) {
+  await checkRateLimit(state)
+
+  const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
+  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
+
+  const openAIPayload = translateToOpenAI(anthropicPayload)
+  consola.debug(
+    "Translated OpenAI request payload:",
+    JSON.stringify(openAIPayload),
+  )
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createChatCompletions(openAIPayload)
+
+  if (isNonStreaming(response)) {
+    consola.debug(
+      "Non-streaming response from Copilot:",
+      JSON.stringify(response),
+    )
+    const anthropicResponse = translateToAnthropic(response)
+    consola.debug(
+      "Translated Anthropic response:",
+      JSON.stringify(anthropicResponse),
+    )
+    return c.json(anthropicResponse)
+  }
+
+  consola.debug("Streaming response from Copilot")
+  return streamSSE(c, async (stream) => {
+    const streamState: AnthropicStreamState = {
+      messageStartSent: false,
+      contentBlockIndex: 0,
+      contentBlockOpen: false,
+      toolCalls: {},
+    }
+
+    for await (const rawEvent of response) {
+      consola.trace("Copilot raw stream event:", JSON.stringify(rawEvent))
+      if (rawEvent.data === "[DONE]") {
+        break
+      }
+
+      if (!rawEvent.data) {
+        continue
+      }
+
+      const chunk = JSON.parse(rawEvent.data) as ChatCompletionChunk
+      const events = translateChunkToAnthropicEvents(chunk, streamState)
+
+      for (const event of events) {
+        consola.trace("Translated Anthropic event:", JSON.stringify(event))
+        await stream.writeSSE({
+          event: event.type,
+          data: JSON.stringify(event),
+        })
+      }
+    }
+  })
+}
+
+const isNonStreaming = (
+  response: Awaited<ReturnType<typeof createChatCompletions>>,
+): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index aaef31e4e..f7365f461 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -248,8 +248,8 @@ export function translateToAnthropic(
     stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
     stop_sequence: null,
     usage: {
-      input_tokens: response.usage?.prompt_tokens ?? 0,
-      output_tokens: response.usage?.completion_tokens ?? 0,
+      input_tokens: 1,
+      output_tokens: 1,
     },
   }
 }
diff --git a/src/routes/messages/openai-anthropic-translation.ts b/src/routes/messages/openai-anthropic-translation.ts
deleted file mode 100644
index 0a17ff29d..000000000
--- a/src/routes/messages/openai-anthropic-translation.ts
+++ /dev/null
@@ -1,499 +0,0 @@
-import {
-  type ChatCompletionChunk,
-  type ChatCompletionResponse,
-  type ChatCompletionsPayload,
-  type ContentPart,
-  type Message,
-  type TextPart,
-  type Tool,
-  type ToolCall,
-} from "~/services/copilot/create-chat-completions"
-
-// Anthropic API Types
-
-export interface AnthropicMessagesPayload {
-  model: string
-  messages: Array<AnthropicMessage>
-  max_tokens: number
-  system?: string | Array<AnthropicTextBlock>
-  metadata?: {
-    user_id?: string
-  }
-  stop_sequences?: Array<string>
-  stream?: boolean
-  temperature?: number
-  top_p?: number
-  top_k?: number
-  tools?: Array<AnthropicTool>
-  tool_choice?: {
-    type: "auto" | "any" | "tool"
-    name?: string
-  }
-}
-
-interface AnthropicMessage {
-  role: "user" | "assistant"
-  content: string | Array<AnthropicContentBlock>
-}
-
-type AnthropicContentBlock =
-  | AnthropicTextBlock
-  | AnthropicImageBlock
-  | AnthropicToolResultBlock
-
-interface AnthropicTextBlock {
-  type: "text"
-  text: string
-}
-
-interface AnthropicImageBlock {
-  type: "image"
-  source: {
-    type: "base64"
-    media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
-    data: string
-  }
-}
-
-interface AnthropicToolResultBlock {
-  type: "tool_result"
-  tool_use_id: string
-  content: string
-}
-
-interface AnthropicTool {
-  name: string
-  description?: string
-  input_schema: Record<string, unknown>
-}
-
-export interface AnthropicResponse {
-  id: string
-  type: "message"
-  role: "assistant"
-  content: Array<AnthropicResponseContentBlock>
-  model: string
-  stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use" | null
-  stop_sequence: string | null
-  usage: {
-    input_tokens: number
-    output_tokens: number
-  }
-}
-
-export type AnthropicResponseContentBlock =
-  | AnthropicTextBlock
-  | AnthropicToolUseBlock
-
-interface AnthropicToolUseBlock {
-  type: "tool_use"
-  id: string
-  name: string
-  input: Record<string, unknown>
-}
-
-// Anthropic Stream Event Types
-export interface AnthropicMessageStartEvent {
-  type: "message_start"
-  message: Omit<
-    AnthropicResponse,
-    "stop_reason" | "stop_sequence" | "content"
-  > & {
-    content: []
-  }
-}
-
-export interface AnthropicContentBlockStartEvent {
-  type: "content_block_start"
-  index: number
-  content_block:
-    | { type: "text"; text: string }
-    | (Omit<AnthropicToolUseBlock, "input"> & {
-        input: Record<string, unknown>
-      })
-}
-
-export interface AnthropicContentBlockDeltaEvent {
-  type: "content_block_delta"
-  index: number
-  delta:
-    | { type: "text_delta"; text: string }
-    | { type: "input_json_delta"; partial_json: string }
-}
-
-export interface AnthropicContentBlockStopEvent {
-  type: "content_block_stop"
-  index: number
-}
-
-export interface AnthropicMessageDeltaEvent {
-  type: "message_delta"
-  delta: {
-    stop_reason: AnthropicResponse["stop_reason"]
-    stop_sequence: string | null
-  }
-  // OpenAI does not provide token usage per chunk, so this is omitted.
-  // usage: { output_tokens: number }
-}
-
-export interface AnthropicMessageStopEvent {
-  type: "message_stop"
-}
-
-export type AnthropicStreamEventData =
-  | AnthropicMessageStartEvent
-  | AnthropicContentBlockStartEvent
-  | AnthropicContentBlockDeltaEvent
-  | AnthropicContentBlockStopEvent
-  | AnthropicMessageDeltaEvent
-  | AnthropicMessageStopEvent
-
-// State for streaming translation
-export interface AnthropicStreamState {
-  messageStartSent: boolean
-  contentBlockIndex: number
-  contentBlockOpen: boolean
-  toolCalls: {
-    [openAIToolIndex: number]: {
-      id: string
-      name: string
-      anthropicBlockIndex: number
-    }
-  }
-}
-
-// Payload translation
-
-export function translateToOpenAI(
-  payload: AnthropicMessagesPayload,
-): ChatCompletionsPayload {
-  return {
-    model: payload.model,
-    messages: translateAnthropicMessagesToOpenAI(
-      payload.messages,
-      payload.system,
-    ),
-    max_tokens: payload.max_tokens,
-    stop: payload.stop_sequences,
-    stream: payload.stream,
-    temperature: payload.temperature,
-    top_p: payload.top_p,
-    user: payload.metadata?.user_id,
-    tools: translateAnthropicToolsToOpenAI(payload.tools),
-    tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
-  }
-}
-
-function translateAnthropicMessagesToOpenAI(
-  anthropicMessages: Array<AnthropicMessage>,
-  system: string | Array<AnthropicTextBlock> | undefined,
-): Array<Message> {
-  const messages: Array<Message> = []
-
-  if (system) {
-    if (typeof system === "string") {
-      messages.push({ role: "system", content: system })
-    } else {
-      const systemText = system.map((block) => block.text).join("\n\n")
-      messages.push({ role: "system", content: systemText })
-    }
-  }
-
-  for (const message of anthropicMessages) {
-    if (message.role === "user" && Array.isArray(message.content)) {
-      const toolResultBlocks = message.content.filter(
-        (block): block is AnthropicToolResultBlock =>
-          block.type === "tool_result",
-      )
-      const otherBlocks = message.content.filter(
-        (block) => block.type !== "tool_result",
-      )
-
-      if (otherBlocks.length > 0) {
-        messages.push({
-          role: "user",
-          content: mapContent(otherBlocks),
-        })
-      }
-
-      for (const block of toolResultBlocks) {
-        messages.push({
-          role: "tool",
-          tool_call_id: block.tool_use_id,
-          content: block.content,
-        })
-      }
-    } else {
-      messages.push({
-        role: message.role,
-        content: mapContent(message.content),
-      })
-    }
-  }
-  return messages
-}
-
-function mapContent(
-  content: string | Array<AnthropicContentBlock>,
-): string | Array<ContentPart> | null {
-  if (typeof content === "string") {
-    return content
-  }
-  if (!Array.isArray(content)) {
-    return null
-  }
-
-  const contentParts: Array<ContentPart> = []
-  for (const block of content) {
-    if (block.type === "text") {
-      contentParts.push({ type: "text", text: block.text })
-    } else if (block.type === "image") {
-      contentParts.push({
-        type: "image_url",
-        image_url: {
-          url: `data:${block.source.media_type};base64,${block.source.data}`,
-        },
-      })
-    }
-  }
-  return contentParts
-}
-
-function translateAnthropicToolsToOpenAI(
-  anthropicTools: Array<AnthropicTool> | undefined,
-): Array<Tool> | undefined {
-  if (!anthropicTools) {
-    return undefined
-  }
-  return anthropicTools.map((tool) => ({
-    type: "function",
-    function: {
-      name: tool.name,
-      description: tool.description,
-      parameters: tool.input_schema,
-    },
-  }))
-}
-
-function translateAnthropicToolChoiceToOpenAI(
-  anthropicToolChoice: AnthropicMessagesPayload["tool_choice"],
-): ChatCompletionsPayload["tool_choice"] {
-  if (!anthropicToolChoice) {
-    return undefined
-  }
-
-  switch (anthropicToolChoice.type) {
-    case "auto": {
-      return "auto"
-    }
-    case "any": {
-      return "required"
-    }
-    case "tool": {
-      if (anthropicToolChoice.name) {
-        return {
-          type: "function",
-          function: { name: anthropicToolChoice.name },
-        }
-      }
-      return undefined
-    }
-    default: {
-      return undefined
-    }
-  }
-}
-
-// Response translation
-
-// Stream response translation
-
-/**
- * Translates a single OpenAI ChatCompletionChunk to a series of Anthropic-style stream events.
- * This function is stateful and requires a state object to be maintained across calls.
- *
- * @param chunk The OpenAI chunk to translate.
- * @param state The current state of the stream translation.
- * @param inputTokens The number of tokens in the prompt, required for the initial message_start event.
- * @returns An array of Anthropic stream event data objects.
- */
-export function translateChunkToAnthropicEvents(
-  chunk: ChatCompletionChunk,
-  state: AnthropicStreamState,
-  inputTokens: number,
-): Array<AnthropicStreamEventData> {
-  const events: Array<AnthropicStreamEventData> = []
-  const delta = chunk.choices[0].delta
-
-  // 1. Handle message_start
-  if (delta.role === "assistant" && !state.messageStartSent) {
-    events.push({
-      type: "message_start",
-      message: {
-        id: chunk.id,
-        type: "message",
-        role: "assistant",
-        content: [],
-        model: chunk.model,
-        usage: {
-          input_tokens: inputTokens,
-          output_tokens: 1, // Placeholder, not updated in subsequent events
-        },
-      },
-    })
-    state.messageStartSent = true
-  }
-
-  // 2. Handle text content
-  if (delta.content) {
-    if (!state.contentBlockOpen) {
-      // Start a new text block if no block is open
-      events.push({
-        type: "content_block_start",
-        index: state.contentBlockIndex,
-        content_block: { type: "text", text: "" },
-      })
-      state.contentBlockOpen = true
-    }
-    events.push({
-      type: "content_block_delta",
-      index: state.contentBlockIndex,
-      delta: { type: "text_delta", text: delta.content },
-    })
-  }
-
-  // 3. Handle tool calls
-  if (delta.tool_calls) {
-    for (const toolCallDelta of delta.tool_calls) {
-      // A new tool call is starting
-      if (toolCallDelta.id && toolCallDelta.function?.name) {
-        if (state.contentBlockOpen) {
-          // Close the previous content block (which must be a text block)
-          events.push({
-            type: "content_block_stop",
-            index: state.contentBlockIndex,
-          })
-          state.contentBlockIndex++
-        }
-        const anthropicBlockIndex = state.contentBlockIndex
-        state.toolCalls[toolCallDelta.index] = {
-          id: toolCallDelta.id,
-          name: toolCallDelta.function.name,
-          anthropicBlockIndex,
-        }
-        events.push({
-          type: "content_block_start",
-          index: anthropicBlockIndex,
-          content_block: {
-            type: "tool_use",
-            id: toolCallDelta.id,
-            name: toolCallDelta.function.name,
-            input: {},
-          },
-        })
-        state.contentBlockOpen = true
-      }
-
-      // Argument chunks for the tool call
-      if (toolCallDelta.function?.arguments) {
-        const toolInfo = state.toolCalls[toolCallDelta.index]
-        if (toolInfo) {
-          events.push({
-            type: "content_block_delta",
-            index: toolInfo.anthropicBlockIndex,
-            delta: {
-              type: "input_json_delta",
-              partial_json: toolCallDelta.function.arguments,
-            },
-          })
-        }
-      }
-    }
-  }
-
-  // 4. Handle end of stream
-  const finishReason = chunk.choices[0].finish_reason
-  if (finishReason) {
-    if (state.contentBlockOpen) {
-      events.push({
-        type: "content_block_stop",
-        index: state.contentBlockIndex,
-      })
-      state.contentBlockOpen = false
-    }
-    events.push({
-      type: "message_delta",
-      delta: {
-        stop_reason: mapOpenAIStopReasonToAnthropic(finishReason),
-        stop_sequence: null,
-      },
-    })
-    events.push({ type: "message_stop" })
-  }
-
-  return events
-}
-
-export function translateToAnthropic(
-  response: ChatCompletionResponse,
-): AnthropicResponse {
-  const choice = response.choices[0]
-  const textBlocks = getAnthropicTextBlocks(choice.message.content)
-  const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
-
-  return {
-    id: response.id,
-    type: "message",
-    role: "assistant",
-    model: response.model,
-    content: [...textBlocks, ...toolUseBlocks],
-    stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
-    stop_sequence: null,
-    usage: {
-      input_tokens: response.usage?.prompt_tokens ?? 0,
-      output_tokens: response.usage?.completion_tokens ?? 0,
-    },
-  }
-}
-
-function getAnthropicTextBlocks(
-  messageContent: Message["content"],
-): Array<AnthropicTextBlock> {
-  if (typeof messageContent === "string") {
-    return [{ type: "text", text: messageContent }]
-  }
-
-  if (Array.isArray(messageContent)) {
-    return messageContent
-      .filter((part): part is TextPart => part.type === "text")
-      .map((part) => ({ type: "text", text: part.text }))
-  }
-
-  return []
-}
-
-function getAnthropicToolUseBlocks(
-  toolCalls: Array<ToolCall> | undefined,
-): Array<AnthropicToolUseBlock> {
-  if (!toolCalls) {
-    return []
-  }
-  return toolCalls.map((toolCall) => ({
-    type: "tool_use",
-    id: toolCall.id,
-    name: toolCall.function.name,
-    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
-  }))
-}
-
-function mapOpenAIStopReasonToAnthropic(
-  finishReason: ChatCompletionResponse["choices"][0]["finish_reason"],
-): AnthropicResponse["stop_reason"] {
-  const stopReasonMap = {
-    stop: "end_turn",
-    length: "max_tokens",
-    tool_calls: "tool_use",
-    content_filter: "end_turn",
-  } as const
-  return stopReasonMap[finishReason]
-}
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
index 681598636..db70a2732 100644
--- a/src/routes/messages/stream-translation.ts
+++ b/src/routes/messages/stream-translation.ts
@@ -20,9 +20,9 @@ function isToolBlockOpen(state: AnthropicStreamState): boolean {
 export function translateChunkToAnthropicEvents(
   chunk: ChatCompletionChunk,
   state: AnthropicStreamState,
-  inputTokens: number,
 ): Array<AnthropicStreamEventData> {
   const events: Array<AnthropicStreamEventData> = []
+
   const choice = chunk.choices[0]
   const { delta } = choice
 
@@ -38,7 +38,7 @@ export function translateChunkToAnthropicEvents(
         stop_reason: null,
         stop_sequence: null,
         usage: {
-          input_tokens: inputTokens,
+          input_tokens: 1,
           output_tokens: 1, // Anthropic requires this to be > 0
         },
       },
@@ -115,6 +115,8 @@ export function translateChunkToAnthropicEvents(
 
       if (toolCall.function?.arguments) {
         const toolCallInfo = state.toolCalls[toolCall.index]
+        // Tool call can still be empty
+        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
         if (toolCallInfo) {
           events.push({
             type: "content_block_delta",
@@ -144,6 +146,9 @@ export function translateChunkToAnthropicEvents(
         stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
         stop_sequence: null,
       },
+      usage: {
+        output_tokens: 1,
+      },
     })
 
     events.push({
@@ -153,3 +158,13 @@ export function translateChunkToAnthropicEvents(
 
   return events
 }
+
+export function translateErrorToAnthropicErrorEvent(): AnthropicStreamEventData {
+  return {
+    type: "error",
+    error: {
+      type: "api_error",
+      message: "An unexpected error occurred during streaming.",
+    },
+  }
+}

From 101f42207d36d56c2915576035b2cb1bf47891fc Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 19:58:31 +0700
Subject: [PATCH 14/25] feat: Add message route and handler

---
 src/routes/messages/route.ts | 7 +++----
 src/server.ts                | 3 +++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/routes/messages/route.ts b/src/routes/messages/route.ts
index b61dd4065..64c04cc5e 100644
--- a/src/routes/messages/route.ts
+++ b/src/routes/messages/route.ts
@@ -4,12 +4,11 @@ import { forwardError } from "~/lib/forward-error"
 
 import { handleCompletion } from "./handler"
 
-export const completionRoutes = new Hono()
+export const messageRoutes = new Hono()
 
-completionRoutes.post("/", async (c) => {
+messageRoutes.post("/", async (c) => {
   try {
-    await handleCompletion(c)
-    return
+    return await handleCompletion(c)
   } catch (error) {
     return await forwardError(c, error)
   }
diff --git a/src/server.ts b/src/server.ts
index eb65371bf..9330f8413 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -4,6 +4,7 @@ import { logger } from "hono/logger"
 
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
+import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
 
 export const server = new Hono()
@@ -16,8 +17,10 @@ server.get("/", (c) => c.text("Server running"))
 server.route("/chat/completions", completionRoutes)
 server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
+server.route("/messages", messageRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/messages", messageRoutes)

From 2aab7d7486155960c12986bb84e304ca41e7deb7 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 20:04:12 +0700
Subject: [PATCH 15/25] feat: Add Anthropic compatible endpoints

---
 src/server.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/server.ts b/src/server.ts
index 9330f8413..f72d61b96 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -17,10 +17,12 @@ server.get("/", (c) => c.text("Server running"))
 server.route("/chat/completions", completionRoutes)
 server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
-server.route("/messages", messageRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+
+// Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
+server.post("/v1/messages/count_tokens", (c) => c.json({ input_tokens: 1 }))

From f8d1678e3fe0e4770cb158fd8703b93b0853b3e8 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 20:17:00 +0700
Subject: [PATCH 16/25] feat: Return available models from state

---
 src/routes/models/route.ts | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts
index 8e282a391..bdcd77f69 100644
--- a/src/routes/models/route.ts
+++ b/src/routes/models/route.ts
@@ -1,14 +1,32 @@
 import { Hono } from "hono"
 
 import { forwardError } from "~/lib/forward-error"
-import { getModels } from "~/services/copilot/get-models"
+import { state } from "~/lib/state"
 
 export const modelRoutes = new Hono()
 
 modelRoutes.get("/", async (c) => {
   try {
-    const models = await getModels()
-    return c.json(models)
+    if (!state.models) {
+      // This should be handled by startup logic, but as a fallback.
+      return c.json({ error: "Models not available" }, 503)
+    }
+
+    const models = state.models.data.map((model) => ({
+      id: model.id,
+      object: "model",
+      type: "model",
+      created: 0, // No date available from source
+      created_at: new Date(0).toISOString(), // No date available from source
+      owned_by: model.vendor,
+      display_name: model.name,
+    }))
+
+    return c.json({
+      object: "list",
+      data: models,
+      has_more: false,
+    })
   } catch (error) {
     return await forwardError(c, error)
   }

From 43dc70c3e086d44c0ca265d3423b425c818de9f8 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 20:19:14 +0700
Subject: [PATCH 17/25] fix: Ensure models are cached before serving requests

---
 src/routes/models/route.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts
index bdcd77f69..4566ef84e 100644
--- a/src/routes/models/route.ts
+++ b/src/routes/models/route.ts
@@ -1,6 +1,7 @@
 import { Hono } from "hono"
 
 import { forwardError } from "~/lib/forward-error"
+import { cacheModels } from "~/lib/models"
 import { state } from "~/lib/state"
 
 export const modelRoutes = new Hono()
@@ -9,10 +10,10 @@ modelRoutes.get("/", async (c) => {
   try {
     if (!state.models) {
       // This should be handled by startup logic, but as a fallback.
-      return c.json({ error: "Models not available" }, 503)
+      await cacheModels()
     }
 
-    const models = state.models.data.map((model) => ({
+    const models = state.models?.data.map((model) => ({
       id: model.id,
       object: "model",
       type: "model",

From f38b56f9b0beda6cacf4b04571585156c21ee74f Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 20:28:15 +0700
Subject: [PATCH 18/25] docs: Update README with API endpoints, usage tips, and
 descriptions

---
 README.md | 65 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index b067f25d0..0e013da7d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Copilot API
+# Copilot API Proxy
 
 > [!WARNING]
 > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
@@ -7,7 +7,7 @@
 
 ## Project Overview
 
-A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools like AI assistants, local interfaces, and development utilities.
+A reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API.
 
 ## Demo
 
@@ -16,7 +16,7 @@ https://github.com/user-attachments/assets/7654b383-669d-4eb9-b23c-06d7aefee8c5
 ## Prerequisites
 
 - Bun (>= 1.2.x)
-- GitHub account with Copilot subscription (Individual or Business)
+- GitHub account with Copilot subscription (individual, business, or enterprise)
 
 ## Installation
 
@@ -64,7 +64,7 @@ npx copilot-api@latest auth
 
 Copilot API now uses a subcommand structure with two main commands:
 
-- `start`: Start the Copilot API server (default command). This command will also handle authentication if needed.
+- `start`: Start the Copilot API server. This command will also handle authentication if needed.
 - `auth`: Run GitHub authentication flow without starting the server. This is typically used if you need to generate a token for use with the `--github-token` option, especially in non-interactive environments.
 
 ## Command Line Options
@@ -73,15 +73,15 @@ Copilot API now uses a subcommand structure with two main commands:
 
 The following command line options are available for the `start` command:
 
-| Option         | Description                                                                   | Default | Alias |
-| -------------- | ----------------------------------------------------------------------------- | ------- | ----- |
-| --port         | Port to listen on                                                             | 4141    | -p    |
-| --verbose      | Enable verbose logging                                                        | false   | -v    |
+| Option         | Description                                                                   | Default    | Alias |
+| -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
+| --port         | Port to listen on                                                             | 4141       | -p    |
+| --verbose      | Enable verbose logging                                                        | false      | -v    |
 | --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
-| --manual       | Enable manual request approval                                                | false   | none  |
-| --rate-limit   | Rate limit in seconds between requests                                        | none    | -r    |
-| --wait         | Wait instead of error when rate limit is hit                                  | false   | -w    |
-| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none    | -g    |
+| --manual       | Enable manual request approval                                                | false      | none  |
+| --rate-limit   | Rate limit in seconds between requests                                        | none       | -r    |
+| --wait         | Wait instead of error when rate limit is hit                                  | false      | -w    |
+| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none       | -g    |
 
 ### Auth Command Options
 
@@ -89,6 +89,29 @@ The following command line options are available for the `start` command:
 | --------- | ---------------------- | ------- | ----- |
 | --verbose | Enable verbose logging | false   | -v    |
 
+## API Endpoints
+
+The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
+
+### OpenAI Compatible Endpoints
+
+These endpoints mimic the OpenAI API structure.
+
+| Endpoint                    | Method | Description                                               |
+| --------------------------- | ------ | --------------------------------------------------------- |
+| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
+| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
+| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
+
+### Anthropic Compatible Endpoints
+
+These endpoints are designed to be compatible with the Anthropic Messages API.
+
+| Endpoint                         | Method | Description                                                  |
+| -------------------------------- | ------ | ------------------------------------------------------------ |
+| `POST /v1/messages`              | `POST` | Creates a model response for a given conversation.           |
+| `POST /v1/messages/count_tokens` | `POST` | Calculates the number of tokens for a given set of messages. |
+
 ## Example Usage
 
 Using with npx:
@@ -143,18 +166,8 @@ bun run start
 
 ## Usage Tips
 
-- Consider using free models (e.g., Gemini, Mistral, Openrouter) as the `weak-model`
-- Use architect mode sparingly
-- Disable `yes-always` in your aider configuration
-- Enable the `--manual` flag to review and approve each request before processing
+- To avoid hitting GitHub Copilot's rate limits, you can use the following flags:
+  - `--manual`: Enables manual approval for each request, giving you full control over when requests are sent.
+  - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
+  - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
-
-### Manual Request Approval
-
-When using the `--manual` flag, the server will prompt you to approve each incoming request:
-
-```
-? Accept incoming request? > (y/N)
-```
-
-This helps you control usage and monitor requests in real-time.

From 8bce5dd2480640230f5ad3e53bcbbd7f04bd00e6 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 20:55:46 +0700
Subject: [PATCH 19/25] docs: Add CLAUDE.md for Claude Code guidance

---
 CLAUDE.md | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 CLAUDE.md

diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 000000000..ef328b4e2
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,37 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Development Commands
+
+- **Install dependencies**: `bun install`
+- **Build**: `bun run build`
+- **Dev server (watch)**: `bun run dev`
+- **Production start**: `bun run start`
+- **Lint**: `bun run lint`
+- **Pre-commit lint/fix**: Runs automatically via git hooks (bunx eslint --fix)
+
+## Architecture Overview
+
+- **Entry point**: `src/main.ts` defines CLI subcommands (`start` and `auth`) for the Copilot API server and authentication flow.
+- **Server**: `src/server.ts` sets up HTTP routes using Hono, maps OpenAI/Anthropic-compatible endpoints, and handles logging/cors.
+- **Routes**: Handlers for chat completions, embeddings, models, and messages are under `src/routes/`, providing API endpoints compatible with OpenAI and Anthropic APIs.
+- **Copilot communication**: `src/services/copilot/` contains methods for proxying requests (chat completions, model listing, embeddings) to the GitHub Copilot backend using user tokens.
+- **Lib utilities**: `src/lib/` contains configuration, token, model caching, and error handling helpers.
+- **Authentication**: `src/auth.ts` provides the CLI handler for authenticating with GitHub, managing required tokens, and persisting them locally.
+
+## API Endpoints
+
+- **OpenAI-compatible**:
+  - `POST /v1/chat/completions`
+  - `GET /v1/models`
+  - `POST /v1/embeddings`
+- **Anthropic-compatible**:
+  - `POST /v1/messages`
+  - `POST /v1/messages/count_tokens`
+
+## Other Notes
+
+- Ensure Bun (>= 1.2.x) is installed for all scripts and local dev.
+- Tokens and cache are handled automatically; manual authentication can be forced with the `auth` subcommand.
+- No .cursorrules, .github/copilot-instructions.md, or .cursor/rules found, so follow typical TypeScript/Bun/ESLint conventions as seen in this codebase.

From 7ddb97b3d4c6428696f656c1ba72d3927b8035c3 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 21:14:09 +0700
Subject: [PATCH 20/25] feat: Add Claude Code launch option and dependencies

---
 bun.lock     |  4 ++++
 package.json |  4 +++-
 src/main.ts  | 55 +++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/bun.lock b/bun.lock
index 1ab1e432e..d6f124104 100644
--- a/bun.lock
+++ b/bun.lock
@@ -10,6 +10,8 @@
         "gpt-tokenizer": "^3.0.1",
         "hono": "^4.8.1",
         "srvx": "^0.8.0",
+        "tiny-invariant": "^1.3.3",
+        "tinyexec": "^1.0.1",
       },
       "devDependencies": {
         "@echristian/eslint-config": "^0.0.43",
@@ -915,6 +917,8 @@
 
     "thenify-all": ["thenify-all@1.6.0", "", { "dependencies": { "thenify": ">= 3.1.0 < 4" } }, "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA=="],
 
+    "tiny-invariant": ["tiny-invariant@1.3.3", "", {}, "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg=="],
+
     "tinyexec": ["tinyexec@1.0.1", "", {}, "sha512-5uC6DDlmeqiOwCPmK9jMSdOuZTh8bU39Ys6yidB+UTt5hfZUPGAypSgFRiEp+jbi9qH40BLDvy85jIU88wKSqw=="],
 
     "tinyglobby": ["tinyglobby@0.2.14", "", { "dependencies": { "fdir": "^6.4.4", "picomatch": "^4.0.2" } }, "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ=="],
diff --git a/package.json b/package.json
index c5c19922e..7a44c9898 100644
--- a/package.json
+++ b/package.json
@@ -43,7 +43,9 @@
     "fetch-event-stream": "^0.1.5",
     "gpt-tokenizer": "^3.0.1",
     "hono": "^4.8.1",
-    "srvx": "^0.8.0"
+    "srvx": "^0.8.0",
+    "tiny-invariant": "^1.3.3",
+    "tinyexec": "^1.0.1"
   },
   "devDependencies": {
     "@echristian/eslint-config": "^0.0.43",
diff --git a/src/main.ts b/src/main.ts
index 8b6dc3401..36a6b75ae 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -3,6 +3,8 @@
 import { defineCommand, runMain } from "citty"
 import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
+import invariant from "tiny-invariant"
+import { x } from "tinyexec"
 
 import { auth } from "./auth"
 import { cacheModels } from "./lib/models"
@@ -20,8 +22,11 @@ interface RunServerOptions {
   rateLimit?: number
   rateLimitWait: boolean
   githubToken?: string
+  launchClaudeCode: boolean
+  launchClaudeCodeDelay: number
 }
 
+// eslint-disable-next-line max-lines-per-function
 export async function runServer(options: RunServerOptions): Promise<void> {
   if (options.verbose) {
     consola.level = 5
@@ -53,6 +58,39 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   const serverUrl = `http://localhost:${options.port}`
   consola.box(`Server started at ${serverUrl}`)
 
+  if (options.launchClaudeCode) {
+    invariant(state.models, "Models should be loaded by now")
+
+    const selectedModel = await consola.prompt(
+      "Select a model to use with Claude Code",
+      {
+        type: "select",
+        options: state.models.data.map((model) => model.id),
+      },
+    )
+
+    const selectedSmallModel = await consola.prompt(
+      "Select a small model to use with Claude Code (https://docs.anthropic.com/en/docs/claude-code/costs#background-token-usage)",
+      {
+        type: "select",
+        options: state.models.data.map((model) => model.id),
+      },
+    )
+
+    setTimeout(() => {
+      x("claude", [], {
+        nodeOptions: {
+          env: {
+            ANTHROPIC_BASE_URL: serverUrl,
+            ANTHROPIC_AUTH_TOKEN: "dummy",
+            ANTHROPIC_MODEL: selectedModel,
+            ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
+          },
+        },
+      })
+    }, options.launchClaudeCodeDelay)
+  }
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,
@@ -106,6 +144,17 @@ const start = defineCommand({
       description:
         "Provide GitHub token directly (must be generated using the `auth` subcommand)",
     },
+    "claude-code": {
+      alias: "c",
+      type: "boolean",
+      default: false,
+      description: "Run Claude Code directly after starting the server",
+    },
+    "claude-code-delay": {
+      type: "string",
+      default: "1000",
+      description: "Delay in milliseconds before running Claude Code",
+    },
   },
   run({ args }) {
     const rateLimitRaw = args["rate-limit"]
@@ -113,16 +162,16 @@ const start = defineCommand({
       // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
       rateLimitRaw === undefined ? undefined : Number.parseInt(rateLimitRaw, 10)
 
-    const port = Number.parseInt(args.port, 10)
-
     return runServer({
-      port,
+      port: Number.parseInt(args.port, 10),
       verbose: args.verbose,
       accountType: args["account-type"],
       manual: args.manual,
       rateLimit,
       rateLimitWait: Boolean(args.wait),
       githubToken: args["github-token"],
+      launchClaudeCode: args["claude-code"],
+      launchClaudeCodeDelay: Number.parseInt(args["claude-code-delay"], 10),
     })
   },
 })

From bb390ba26e7f5074b4d867129770bbc1f1307fb9 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 22:17:35 +0700
Subject: [PATCH 21/25] feat: command generation, bc I cant spawn

---
 bun.lock                                      |  36 +++++-
 package.json                                  |   4 +-
 src/lib/approval.ts                           |   2 +-
 src/lib/{forward-error.ts => error.ts}        |   9 +-
 src/lib/http-error.ts                         |   8 --
 src/lib/is-nullish.ts                         |   2 -
 src/lib/models.ts                             |  14 --
 src/lib/rate-limit.ts                         |   4 +-
 src/lib/shell.ts                              | 122 ++++++++++++++++++
 src/lib/sleep.ts                              |   4 -
 src/lib/token.ts                              |   2 +-
 src/lib/utils.ts                              |  26 ++++
 src/lib/vscode-version.ts                     |  12 --
 src/main.ts                                   |  43 +++---
 src/routes/chat-completions/handler.ts        |   2 +-
 src/routes/chat-completions/route.ts          |   2 +-
 src/routes/embeddings/route.ts                |   2 +-
 src/routes/messages/route.ts                  |   2 +-
 src/routes/models/route.ts                    |   4 +-
 .../copilot/create-chat-completions.ts        |   2 +-
 src/services/copilot/create-embeddings.ts     |   2 +-
 src/services/copilot/get-models.ts            |   2 +-
 src/services/github/get-copilot-token.ts      |   2 +-
 src/services/github/get-device-code.ts        |   2 +-
 src/services/github/get-user.ts               |   2 +-
 src/services/github/poll-access-token.ts      |   2 +-
 26 files changed, 228 insertions(+), 86 deletions(-)
 rename src/lib/{forward-error.ts => error.ts} (79%)
 delete mode 100644 src/lib/http-error.ts
 delete mode 100644 src/lib/is-nullish.ts
 delete mode 100644 src/lib/models.ts
 create mode 100644 src/lib/shell.ts
 delete mode 100644 src/lib/sleep.ts
 create mode 100644 src/lib/utils.ts
 delete mode 100644 src/lib/vscode-version.ts

diff --git a/bun.lock b/bun.lock
index d6f124104..8633525a8 100644
--- a/bun.lock
+++ b/bun.lock
@@ -5,13 +5,13 @@
       "name": "copilot-api",
       "dependencies": {
         "citty": "^0.1.6",
+        "clipboardy": "^4.0.0",
         "consola": "^3.4.2",
         "fetch-event-stream": "^0.1.5",
         "gpt-tokenizer": "^3.0.1",
         "hono": "^4.8.1",
         "srvx": "^0.8.0",
         "tiny-invariant": "^1.3.3",
-        "tinyexec": "^1.0.1",
       },
       "devDependencies": {
         "@echristian/eslint-config": "^0.0.43",
@@ -345,6 +345,8 @@
 
     "cli-truncate": ["cli-truncate@4.0.0", "", { "dependencies": { "slice-ansi": "^5.0.0", "string-width": "^7.0.0" } }, "sha512-nPdaFdQ0h/GEigbPClz11D0v/ZJEwxmeVZGeMo3Z5StPtUTkA9o1lD6QwoirYiSDzbcwn2XcjwmCp68W1IS4TA=="],
 
+    "clipboardy": ["clipboardy@4.0.0", "", { "dependencies": { "execa": "^8.0.1", "is-wsl": "^3.1.0", "is64bit": "^2.0.0" } }, "sha512-5mOlNS0mhX0707P2I0aZ2V/cmHUEO/fL7VFLqszkhUsxt7RwnmrInf/eEQKlf5GzvYeHIjT+Ov1HRfNmymlG0w=="],
+
     "cliui": ["cliui@8.0.1", "", { "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" } }, "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ=="],
 
     "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="],
@@ -481,6 +483,8 @@
 
     "eventemitter3": ["eventemitter3@5.0.1", "", {}, "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA=="],
 
+    "execa": ["execa@8.0.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^8.0.1", "human-signals": "^5.0.0", "is-stream": "^3.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^5.1.0", "onetime": "^6.0.0", "signal-exit": "^4.1.0", "strip-final-newline": "^3.0.0" } }, "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg=="],
+
     "exsolve": ["exsolve@1.0.5", "", {}, "sha512-pz5dvkYYKQ1AHVrgOzBKWeP4u4FRb3a6DNK2ucr0OoNwYIU4QWsJ+NM36LLzORT+z845MzKHHhpXiUF5nvQoJg=="],
 
     "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
@@ -537,6 +541,8 @@
 
     "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="],
 
+    "get-stream": ["get-stream@8.0.1", "", {}, "sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA=="],
+
     "get-symbol-description": ["get-symbol-description@1.1.0", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6" } }, "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg=="],
 
     "giget": ["giget@2.0.0", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.0", "defu": "^6.1.4", "node-fetch-native": "^1.6.6", "nypm": "^0.6.0", "pathe": "^2.0.3" }, "bin": { "giget": "dist/cli.mjs" } }, "sha512-L5bGsVkxJbJgdnwyuheIunkGatUF/zssUoxxjACCseZYAVbaqdh9Tsmmlkl8vYan09H7sbvKt4pS8GqKLBrEzA=="],
@@ -573,6 +579,8 @@
 
     "hono": ["hono@4.8.1", "", {}, "sha512-ErA2ifywnSmcnB5XDuFqGDfXJ9xuAJR2C/8cZAk6vDaOCzofB8eNlha/wZWIiamREzWk94S9Z7wHsnKQHn7Niw=="],
 
+    "human-signals": ["human-signals@5.0.0", "", {}, "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ=="],
+
     "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
 
     "import-fresh": ["import-fresh@3.3.0", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw=="],
@@ -599,6 +607,8 @@
 
     "is-date-object": ["is-date-object@1.1.0", "", { "dependencies": { "call-bound": "^1.0.2", "has-tostringtag": "^1.0.2" } }, "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg=="],
 
+    "is-docker": ["is-docker@3.0.0", "", { "bin": { "is-docker": "cli.js" } }, "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ=="],
+
     "is-extglob": ["is-extglob@2.1.1", "", {}, "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ=="],
 
     "is-finalizationregistry": ["is-finalizationregistry@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg=="],
@@ -611,6 +621,8 @@
 
     "is-immutable-type": ["is-immutable-type@5.0.1", "", { "dependencies": { "@typescript-eslint/type-utils": "^8.0.0", "ts-api-utils": "^2.0.0", "ts-declaration-location": "^1.0.4" }, "peerDependencies": { "eslint": "*", "typescript": ">=4.7.4" } }, "sha512-LkHEOGVZZXxGl8vDs+10k3DvP++SEoYEAJLRk6buTFi6kD7QekThV7xHS0j6gpnUCQ0zpud/gMDGiV4dQneLTg=="],
 
+    "is-inside-container": ["is-inside-container@1.0.0", "", { "dependencies": { "is-docker": "^3.0.0" }, "bin": { "is-inside-container": "cli.js" } }, "sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA=="],
+
     "is-map": ["is-map@2.0.3", "", {}, "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw=="],
 
     "is-number": ["is-number@7.0.0", "", {}, "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng=="],
@@ -625,6 +637,8 @@
 
     "is-shared-array-buffer": ["is-shared-array-buffer@1.0.4", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A=="],
 
+    "is-stream": ["is-stream@3.0.0", "", {}, "sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA=="],
+
     "is-string": ["is-string@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA=="],
 
     "is-symbol": ["is-symbol@1.1.1", "", { "dependencies": { "call-bound": "^1.0.2", "has-symbols": "^1.1.0", "safe-regex-test": "^1.1.0" } }, "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w=="],
@@ -637,6 +651,10 @@
 
     "is-weakset": ["is-weakset@2.0.4", "", { "dependencies": { "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" } }, "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ=="],
 
+    "is-wsl": ["is-wsl@3.1.0", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw=="],
+
+    "is64bit": ["is64bit@2.0.0", "", { "dependencies": { "system-architecture": "^0.1.0" } }, "sha512-jv+8jaWCl0g2lSBkNSVXdzfBA0npK1HGC2KtWM9FumFRoGS94g3NbCCLVnCYHLjp4GrW2KZeeSTMo5ddtznmGw=="],
+
     "isarray": ["isarray@2.0.5", "", {}, "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw=="],
 
     "isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="],
@@ -699,10 +717,14 @@
 
     "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="],
 
+    "merge-stream": ["merge-stream@2.0.0", "", {}, "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="],
+
     "merge2": ["merge2@1.4.1", "", {}, "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg=="],
 
     "micromatch": ["micromatch@4.0.8", "", { "dependencies": { "braces": "^3.0.3", "picomatch": "^2.3.1" } }, "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA=="],
 
+    "mimic-fn": ["mimic-fn@4.0.0", "", {}, "sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw=="],
+
     "mimic-function": ["mimic-function@5.0.1", "", {}, "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA=="],
 
     "min-indent": ["min-indent@1.0.1", "", {}, "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg=="],
@@ -731,6 +753,8 @@
 
     "node-releases": ["node-releases@2.0.19", "", {}, "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw=="],
 
+    "npm-run-path": ["npm-run-path@5.3.0", "", { "dependencies": { "path-key": "^4.0.0" } }, "sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ=="],
+
     "nypm": ["nypm@0.6.0", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.0", "pathe": "^2.0.3", "pkg-types": "^2.0.0", "tinyexec": "^0.3.2" }, "bin": { "nypm": "dist/cli.mjs" } }, "sha512-mn8wBFV9G9+UFHIrq+pZ2r2zL4aPau/by3kJb3cM7+5tQHMt6HGQB8FDIeKFYp8o0D2pnH6nVsO88N4AmUxIWg=="],
 
     "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="],
@@ -747,7 +771,7 @@
 
     "ohash": ["ohash@2.0.11", "", {}, "sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ=="],
 
-    "onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
+    "onetime": ["onetime@6.0.0", "", { "dependencies": { "mimic-fn": "^4.0.0" } }, "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ=="],
 
     "optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="],
 
@@ -903,6 +927,8 @@
 
     "strip-ansi-cjs": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
 
+    "strip-final-newline": ["strip-final-newline@3.0.0", "", {}, "sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw=="],
+
     "strip-indent": ["strip-indent@4.0.0", "", { "dependencies": { "min-indent": "^1.0.1" } }, "sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA=="],
 
     "strip-json-comments": ["strip-json-comments@5.0.2", "", {}, "sha512-4X2FR3UwhNUE9G49aIsJW5hRRR3GXGTBTZRMfv568O60ojM8HcWjV/VxAxCDW3SUND33O6ZY66ZuRcdkj73q2g=="],
@@ -913,6 +939,8 @@
 
     "synckit": ["synckit@0.11.8", "", { "dependencies": { "@pkgr/core": "^0.2.4" } }, "sha512-+XZ+r1XGIJGeQk3VvXhT6xx/VpbHsRzsTkGgF6E5RX9TTXD0118l87puaEBZ566FhqblC6U0d4XnubznJDm30A=="],
 
+    "system-architecture": ["system-architecture@0.1.0", "", {}, "sha512-ulAk51I9UVUyJgxlv9M6lFot2WP3e7t8Kz9+IS6D4rVba1tR9kON+Ey69f+1R4Q8cd45Lod6a4IcJIxnzGc/zA=="],
+
     "thenify": ["thenify@3.3.1", "", { "dependencies": { "any-promise": "^1.0.0" } }, "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw=="],
 
     "thenify-all": ["thenify-all@1.6.0", "", { "dependencies": { "thenify": ">= 3.1.0 < 4" } }, "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA=="],
@@ -1091,6 +1119,8 @@
 
     "mlly/pkg-types": ["pkg-types@1.3.1", "", { "dependencies": { "confbox": "^0.1.8", "mlly": "^1.7.4", "pathe": "^2.0.1" } }, "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ=="],
 
+    "npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="],
+
     "nypm/tinyexec": ["tinyexec@0.3.2", "", {}, "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA=="],
 
     "pkg-types/confbox": ["confbox@0.2.1", "", {}, "sha512-hkT3yDPFbs95mNCy1+7qNKC6Pro+/ibzYxtM2iqEigpf0sVw+bg4Zh9/snjsBcf990vfIsg5+1U7VyiyBb3etg=="],
@@ -1099,6 +1129,8 @@
 
     "regjsparser/jsesc": ["jsesc@3.0.2", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g=="],
 
+    "restore-cursor/onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
+
     "slice-ansi/ansi-styles": ["ansi-styles@6.2.1", "", {}, "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug=="],
 
     "sort-package-json/detect-indent": ["detect-indent@7.0.1", "", {}, "sha512-Mc7QhQ8s+cLrnUfU/Ji94vG/r8M26m8f++vyres4ZoojaRDpZ1eSIh/EpzLNwlWuvzSZ3UbDFspjFvTDXe6e/g=="],
diff --git a/package.json b/package.json
index 7a44c9898..d65e68211 100644
--- a/package.json
+++ b/package.json
@@ -39,13 +39,13 @@
   },
   "dependencies": {
     "citty": "^0.1.6",
+    "clipboardy": "^4.0.0",
     "consola": "^3.4.2",
     "fetch-event-stream": "^0.1.5",
     "gpt-tokenizer": "^3.0.1",
     "hono": "^4.8.1",
     "srvx": "^0.8.0",
-    "tiny-invariant": "^1.3.3",
-    "tinyexec": "^1.0.1"
+    "tiny-invariant": "^1.3.3"
   },
   "devDependencies": {
     "@echristian/eslint-config": "^0.0.43",
diff --git a/src/lib/approval.ts b/src/lib/approval.ts
index 1a7fb6a71..35e4e2752 100644
--- a/src/lib/approval.ts
+++ b/src/lib/approval.ts
@@ -1,6 +1,6 @@
 import consola from "consola"
 
-import { HTTPError } from "./http-error"
+import { HTTPError } from "./error"
 
 export const awaitApproval = async () => {
   const response = await consola.prompt(`Accept incoming request?`, {
diff --git a/src/lib/forward-error.ts b/src/lib/error.ts
similarity index 79%
rename from src/lib/forward-error.ts
rename to src/lib/error.ts
index c0a1e02c2..dd2557df1 100644
--- a/src/lib/forward-error.ts
+++ b/src/lib/error.ts
@@ -3,7 +3,14 @@ import type { ContentfulStatusCode } from "hono/utils/http-status"
 
 import consola from "consola"
 
-import { HTTPError } from "./http-error"
+export class HTTPError extends Error {
+  response: Response
+
+  constructor(message: string, response: Response) {
+    super(message)
+    this.response = response
+  }
+}
 
 export async function forwardError(c: Context, error: unknown) {
   consola.error("Error occurred:", error)
diff --git a/src/lib/http-error.ts b/src/lib/http-error.ts
deleted file mode 100644
index 352d3c628..000000000
--- a/src/lib/http-error.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-export class HTTPError extends Error {
-  response: Response
-
-  constructor(message: string, response: Response) {
-    super(message)
-    this.response = response
-  }
-}
diff --git a/src/lib/is-nullish.ts b/src/lib/is-nullish.ts
deleted file mode 100644
index a31cf3546..000000000
--- a/src/lib/is-nullish.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export const isNullish = (value: unknown): value is null | undefined =>
-  value === null || value === undefined
diff --git a/src/lib/models.ts b/src/lib/models.ts
deleted file mode 100644
index d6a3516b7..000000000
--- a/src/lib/models.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-import consola from "consola"
-
-import { getModels } from "~/services/copilot/get-models"
-
-import { state } from "./state"
-
-export async function cacheModels(): Promise<void> {
-  const models = await getModels()
-  state.models = models
-
-  consola.info(
-    `Available models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`,
-  )
-}
diff --git a/src/lib/rate-limit.ts b/src/lib/rate-limit.ts
index 6e85a49b7..e41f58297 100644
--- a/src/lib/rate-limit.ts
+++ b/src/lib/rate-limit.ts
@@ -2,8 +2,8 @@ import consola from "consola"
 
 import type { State } from "./state"
 
-import { HTTPError } from "./http-error"
-import { sleep } from "./sleep"
+import { HTTPError } from "./error"
+import { sleep } from "./utils"
 
 export async function checkRateLimit(state: State) {
   if (state.rateLimitSeconds === undefined) return
diff --git a/src/lib/shell.ts b/src/lib/shell.ts
new file mode 100644
index 000000000..af85083c6
--- /dev/null
+++ b/src/lib/shell.ts
@@ -0,0 +1,122 @@
+import { execSync } from "node:child_process"
+import process from "node:process"
+
+type ShellName = "bash" | "zsh" | "fish" | "powershell" | "cmd" | "sh"
+type EnvVars = Record<string, string | undefined>
+
+function getShell(): ShellName {
+  const { platform, ppid, env } = process
+
+  if (platform === "win32") {
+    try {
+      const command = `wmic process get ParentProcessId,Name | findstr "${ppid}"`
+      const parentProcess = execSync(command, { stdio: "pipe" }).toString()
+
+      if (parentProcess.toLowerCase().includes("powershell.exe")) {
+        return "powershell"
+      }
+    } catch {
+      return "cmd"
+    }
+
+    return "cmd"
+  } else {
+    const shellPath = env.SHELL
+    if (shellPath) {
+      if (shellPath.endsWith("zsh")) return "zsh"
+      if (shellPath.endsWith("fish")) return "fish"
+      if (shellPath.endsWith("bash")) return "bash"
+    }
+
+    return "sh"
+  }
+}
+
+/**
+ * Generates a copy-pasteable script to set multiple environment variables
+ * and run a subsequent command.
+ * @param {EnvVars} envVars - An object of environment variables to set.
+ * @param {string} commandToRun - The command to run after setting the variables.
+ * @returns {string} The formatted script string.
+ */
+// eslint-disable-next-line complexity
+function generateEnvScript(
+  envVars: EnvVars,
+  commandToRun: string = "",
+): string {
+  const shell = getShell()
+  const commands: Array<string> = []
+
+  for (const [key, value] of Object.entries(envVars)) {
+    if (value === undefined) {
+      continue // Skip undefined values
+    }
+
+    // Best-effort quoting to handle spaces and special characters.
+    // PowerShell and cmd handle quotes differently from Unix shells.
+    let escapedValue: string
+    if (shell === "cmd") {
+      // CMD is tricky with quotes. Often it's safer without them if no spaces.
+      escapedValue = value.includes(" ") ? `"${value}"` : value
+    } else {
+      // For PowerShell and Unix shells, wrapping in double quotes is generally safe.
+      // We escape any internal double quotes for robustness.
+      escapedValue = `"${value.replaceAll('"', String.raw`\"`)}"`
+    }
+
+    switch (shell) {
+      case "powershell": {
+        commands.push(`$env:${key} = ${escapedValue}`)
+        break
+      }
+      case "cmd": {
+        commands.push(`set ${key}=${escapedValue}`)
+        break
+      }
+      case "fish": {
+        // Fish prefers 'set -gx KEY VALUE' syntax.
+        commands.push(`set -gx ${key} ${escapedValue}`)
+        break
+      }
+      default: {
+        commands.push(`export ${key}=${escapedValue}`)
+        break
+      }
+    }
+  }
+
+  const intro = `# Paste the following into your terminal (${shell}) to set environment variables and run the command:\n`
+  const finalCommand = commandToRun ? `\n${commandToRun}` : ""
+  const commandBlock = commands.join("\n")
+
+  if (shell === "cmd") {
+    // For cmd, chaining is difficult. Presenting a block to copy is most reliable.
+    const runInstruction =
+      finalCommand ? `\n\n# Now, run the command:\n${commandToRun}` : ""
+    return `${intro}${commandBlock}${runInstruction}`
+  }
+
+  return `${intro}${commandBlock}${finalCommand}`
+}
+
+// --- Example Usage ---
+
+// 1. Define the environment variables and the final command.
+const serverUrl = "http://localhost:1234/v1"
+const selectedModel = "claude-3-opus-20240229"
+const selectedSmallModel = "claude-3-haiku-20240307"
+
+const envVariables: EnvVars = {
+  ANTHROPIC_BASE_URL: serverUrl,
+  ANTHROPIC_AUTH_TOKEN: "your-secret-token",
+  ANTHROPIC_MODEL: selectedModel,
+  ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
+  // You can include undefined values; the function will safely skip them.
+  OPTIONAL_SETTING: undefined,
+}
+
+const command = 'claude "What is the airspeed velocity of an unladen swallow?"'
+
+// 2. Generate and print the script.
+const scriptString = generateEnvScript(envVariables, command)
+console.log(scriptString)
diff --git a/src/lib/sleep.ts b/src/lib/sleep.ts
deleted file mode 100644
index 35b2fd531..000000000
--- a/src/lib/sleep.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-export const sleep = (ms: number) =>
-  new Promise((resolve) => {
-    setTimeout(resolve, ms)
-  })
diff --git a/src/lib/token.ts b/src/lib/token.ts
index aa669676d..f2cec3e0d 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -7,7 +7,7 @@ import { getDeviceCode } from "~/services/github/get-device-code"
 import { getGitHubUser } from "~/services/github/get-user"
 import { pollAccessToken } from "~/services/github/poll-access-token"
 
-import { HTTPError } from "./http-error"
+import { HTTPError } from "./error"
 import { state } from "./state"
 
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8")
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
new file mode 100644
index 000000000..cc80be667
--- /dev/null
+++ b/src/lib/utils.ts
@@ -0,0 +1,26 @@
+import consola from "consola"
+
+import { getModels } from "~/services/copilot/get-models"
+import { getVSCodeVersion } from "~/services/get-vscode-version"
+
+import { state } from "./state"
+
+export const sleep = (ms: number) =>
+  new Promise((resolve) => {
+    setTimeout(resolve, ms)
+  })
+
+export const isNullish = (value: unknown): value is null | undefined =>
+  value === null || value === undefined
+
+export async function cacheModels(): Promise<void> {
+  const models = await getModels()
+  state.models = models
+}
+
+export const cacheVSCodeVersion = async () => {
+  const response = await getVSCodeVersion()
+  state.vsCodeVersion = response
+
+  consola.info(`Using VSCode version: ${response}`)
+}
diff --git a/src/lib/vscode-version.ts b/src/lib/vscode-version.ts
deleted file mode 100644
index 5b3301133..000000000
--- a/src/lib/vscode-version.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-import consola from "consola"
-
-import { getVSCodeVersion } from "~/services/get-vscode-version"
-
-import { state } from "./state"
-
-export const cacheVSCodeVersion = async () => {
-  const response = await getVSCodeVersion()
-  state.vsCodeVersion = response
-
-  consola.info(`Using VSCode version: ${response}`)
-}
diff --git a/src/main.ts b/src/main.ts
index 36a6b75ae..5bc4cc83c 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -2,16 +2,15 @@
 
 import { defineCommand, runMain } from "citty"
 import consola from "consola"
+import child from "node:child_process"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
-import { x } from "tinyexec"
 
 import { auth } from "./auth"
-import { cacheModels } from "./lib/models"
 import { ensurePaths } from "./lib/paths"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
-import { cacheVSCodeVersion } from "./lib/vscode-version"
+import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
 import { server } from "./server"
 
 interface RunServerOptions {
@@ -23,7 +22,6 @@ interface RunServerOptions {
   rateLimitWait: boolean
   githubToken?: string
   launchClaudeCode: boolean
-  launchClaudeCodeDelay: number
 }
 
 // eslint-disable-next-line max-lines-per-function
@@ -55,8 +53,11 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   await setupCopilotToken()
   await cacheModels()
 
+  consola.info(
+    `Available models: \n${state.models?.data.map((model) => `- ${model.id}`).join("\n")}`,
+  )
+
   const serverUrl = `http://localhost:${options.port}`
-  consola.box(`Server started at ${serverUrl}`)
 
   if (options.launchClaudeCode) {
     invariant(state.models, "Models should be loaded by now")
@@ -77,18 +78,17 @@ export async function runServer(options: RunServerOptions): Promise<void> {
       },
     )
 
-    setTimeout(() => {
-      x("claude", [], {
-        nodeOptions: {
-          env: {
-            ANTHROPIC_BASE_URL: serverUrl,
-            ANTHROPIC_AUTH_TOKEN: "dummy",
-            ANTHROPIC_MODEL: selectedModel,
-            ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
-          },
-        },
-      })
-    }, options.launchClaudeCodeDelay)
+    child.spawn("claude", [], {
+      detached: true,
+      stdio: "ignore",
+      shell: true,
+      env: {
+        ANTHROPIC_BASE_URL: serverUrl,
+        ANTHROPIC_AUTH_TOKEN: "dummy",
+        ANTHROPIC_MODEL: selectedModel,
+        ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
+      },
+    })
   }
 
   serve({
@@ -148,12 +148,8 @@ const start = defineCommand({
       alias: "c",
       type: "boolean",
       default: false,
-      description: "Run Claude Code directly after starting the server",
-    },
-    "claude-code-delay": {
-      type: "string",
-      default: "1000",
-      description: "Delay in milliseconds before running Claude Code",
+      description:
+        "Generate a command to launch Claude Code with Copilot API config",
     },
   },
   run({ args }) {
@@ -171,7 +167,6 @@ const start = defineCommand({
       rateLimitWait: Boolean(args.wait),
       githubToken: args["github-token"],
       launchClaudeCode: args["claude-code"],
-      launchClaudeCodeDelay: Number.parseInt(args["claude-code-delay"], 10),
     })
   },
 })
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 29a8719c3..69beaa105 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -4,10 +4,10 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
-import { isNullish } from "~/lib/is-nullish"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
+import { isNullish } from "~/lib/utils"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
diff --git a/src/routes/chat-completions/route.ts b/src/routes/chat-completions/route.ts
index c55a3a7b2..996de4a06 100644
--- a/src/routes/chat-completions/route.ts
+++ b/src/routes/chat-completions/route.ts
@@ -1,6 +1,6 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
+import { forwardError } from "~/lib/error"
 
 import { handleCompletion } from "./handler"
 
diff --git a/src/routes/embeddings/route.ts b/src/routes/embeddings/route.ts
index f18c86457..4c4fc7b8a 100644
--- a/src/routes/embeddings/route.ts
+++ b/src/routes/embeddings/route.ts
@@ -1,6 +1,6 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
+import { forwardError } from "~/lib/error"
 import {
   createEmbeddings,
   type EmbeddingRequest,
diff --git a/src/routes/messages/route.ts b/src/routes/messages/route.ts
index 64c04cc5e..1f4eee2f9 100644
--- a/src/routes/messages/route.ts
+++ b/src/routes/messages/route.ts
@@ -1,6 +1,6 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
+import { forwardError } from "~/lib/error"
 
 import { handleCompletion } from "./handler"
 
diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts
index 4566ef84e..5254e2af7 100644
--- a/src/routes/models/route.ts
+++ b/src/routes/models/route.ts
@@ -1,8 +1,8 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
-import { cacheModels } from "~/lib/models"
+import { forwardError } from "~/lib/error"
 import { state } from "~/lib/state"
+import { cacheModels } from "~/lib/utils"
 
 export const modelRoutes = new Hono()
 
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index aaee86cea..da9d0c19d 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -1,7 +1,7 @@
 import { events } from "fetch-event-stream"
 
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
diff --git a/src/services/copilot/create-embeddings.ts b/src/services/copilot/create-embeddings.ts
index 7b43a19b4..f2ad5c233 100644
--- a/src/services/copilot/create-embeddings.ts
+++ b/src/services/copilot/create-embeddings.ts
@@ -1,5 +1,5 @@
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const createEmbeddings = async (payload: EmbeddingRequest) => {
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 68279a273..792adc480 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -1,5 +1,5 @@
 import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getModels = async () => {
diff --git a/src/services/github/get-copilot-token.ts b/src/services/github/get-copilot-token.ts
index 55701f300..98744bab1 100644
--- a/src/services/github/get-copilot-token.ts
+++ b/src/services/github/get-copilot-token.ts
@@ -1,5 +1,5 @@
 import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getCopilotToken = async () => {
diff --git a/src/services/github/get-device-code.ts b/src/services/github/get-device-code.ts
index 1c3bebbb4..cf35f4ec9 100644
--- a/src/services/github/get-device-code.ts
+++ b/src/services/github/get-device-code.ts
@@ -4,7 +4,7 @@ import {
   GITHUB_CLIENT_ID,
   standardHeaders,
 } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 
 export async function getDeviceCode(): Promise<DeviceCodeResponse> {
   const response = await fetch(`${GITHUB_BASE_URL}/login/device/code`, {
diff --git a/src/services/github/get-user.ts b/src/services/github/get-user.ts
index 215907546..23e1b1c1c 100644
--- a/src/services/github/get-user.ts
+++ b/src/services/github/get-user.ts
@@ -1,5 +1,5 @@
 import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export async function getGitHubUser() {
diff --git a/src/services/github/poll-access-token.ts b/src/services/github/poll-access-token.ts
index 938ff70bd..4639ee0dc 100644
--- a/src/services/github/poll-access-token.ts
+++ b/src/services/github/poll-access-token.ts
@@ -5,7 +5,7 @@ import {
   GITHUB_CLIENT_ID,
   standardHeaders,
 } from "~/lib/api-config"
-import { sleep } from "~/lib/sleep"
+import { sleep } from "~/lib/utils"
 
 import type { DeviceCodeResponse } from "./get-device-code"
 

From 6840eb821533aacb7947ad61ded6b044d3ccffd9 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 22:18:38 +0700
Subject: [PATCH 22/25] feat: actual command gen

---
 src/lib/shell.ts                          | 104 ++++++++--------------
 src/main.ts                               |  16 ++--
 src/routes/messages/stream-translation.ts |   6 ++
 3 files changed, 50 insertions(+), 76 deletions(-)

diff --git a/src/lib/shell.ts b/src/lib/shell.ts
index af85083c6..6a0052004 100644
--- a/src/lib/shell.ts
+++ b/src/lib/shell.ts
@@ -39,84 +39,50 @@ function getShell(): ShellName {
  * @param {string} commandToRun - The command to run after setting the variables.
  * @returns {string} The formatted script string.
  */
-// eslint-disable-next-line complexity
-function generateEnvScript(
+export function generateEnvScript(
   envVars: EnvVars,
   commandToRun: string = "",
 ): string {
   const shell = getShell()
-  const commands: Array<string> = []
-
-  for (const [key, value] of Object.entries(envVars)) {
-    if (value === undefined) {
-      continue // Skip undefined values
+  const filteredEnvVars = Object.entries(envVars).filter(
+    ([, value]) => value !== undefined,
+  ) as Array<[string, string]>
+
+  let commandBlock: string
+
+  switch (shell) {
+    case "powershell": {
+      commandBlock = filteredEnvVars
+        .map(([key, value]) => `$env:${key} = ${value}`)
+        .join("; ")
+      break
     }
-
-    // Best-effort quoting to handle spaces and special characters.
-    // PowerShell and cmd handle quotes differently from Unix shells.
-    let escapedValue: string
-    if (shell === "cmd") {
-      // CMD is tricky with quotes. Often it's safer without them if no spaces.
-      escapedValue = value.includes(" ") ? `"${value}"` : value
-    } else {
-      // For PowerShell and Unix shells, wrapping in double quotes is generally safe.
-      // We escape any internal double quotes for robustness.
-      escapedValue = `"${value.replaceAll('"', String.raw`\"`)}"`
+    case "cmd": {
+      commandBlock = filteredEnvVars
+        .map(([key, value]) => `set ${key}=${value}`)
+        .join(" & ")
+      break
     }
-
-    switch (shell) {
-      case "powershell": {
-        commands.push(`$env:${key} = ${escapedValue}`)
-        break
-      }
-      case "cmd": {
-        commands.push(`set ${key}=${escapedValue}`)
-        break
-      }
-      case "fish": {
-        // Fish prefers 'set -gx KEY VALUE' syntax.
-        commands.push(`set -gx ${key} ${escapedValue}`)
-        break
-      }
-      default: {
-        commands.push(`export ${key}=${escapedValue}`)
-        break
-      }
+    case "fish": {
+      commandBlock = filteredEnvVars
+        .map(([key, value]) => `set -gx ${key} ${value}`)
+        .join("; ")
+      break
+    }
+    default: {
+      // bash, zsh, sh
+      const assignments = filteredEnvVars
+        .map(([key, value]) => `${key}=${value}`)
+        .join(" ")
+      commandBlock = filteredEnvVars.length > 0 ? `export ${assignments}` : ""
+      break
     }
   }
 
-  const intro = `# Paste the following into your terminal (${shell}) to set environment variables and run the command:\n`
-  const finalCommand = commandToRun ? `\n${commandToRun}` : ""
-  const commandBlock = commands.join("\n")
-
-  if (shell === "cmd") {
-    // For cmd, chaining is difficult. Presenting a block to copy is most reliable.
-    const runInstruction =
-      finalCommand ? `\n\n# Now, run the command:\n${commandToRun}` : ""
-    return `${intro}${commandBlock}${runInstruction}`
+  if (commandBlock && commandToRun) {
+    const separator = shell === "cmd" ? " & " : " && "
+    return `${commandBlock}${separator}${commandToRun}`
   }
 
-  return `${intro}${commandBlock}${finalCommand}`
+  return commandBlock || commandToRun
 }
-
-// --- Example Usage ---
-
-// 1. Define the environment variables and the final command.
-const serverUrl = "http://localhost:1234/v1"
-const selectedModel = "claude-3-opus-20240229"
-const selectedSmallModel = "claude-3-haiku-20240307"
-
-const envVariables: EnvVars = {
-  ANTHROPIC_BASE_URL: serverUrl,
-  ANTHROPIC_AUTH_TOKEN: "your-secret-token",
-  ANTHROPIC_MODEL: selectedModel,
-  ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
-  // You can include undefined values; the function will safely skip them.
-  OPTIONAL_SETTING: undefined,
-}
-
-const command = 'claude "What is the airspeed velocity of an unladen swallow?"'
-
-// 2. Generate and print the script.
-const scriptString = generateEnvScript(envVariables, command)
-console.log(scriptString)
diff --git a/src/main.ts b/src/main.ts
index 5bc4cc83c..1871bd756 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,13 +1,14 @@
 #!/usr/bin/env node
 
 import { defineCommand, runMain } from "citty"
+import clipboard from "clipboardy"
 import consola from "consola"
-import child from "node:child_process"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
 import { auth } from "./auth"
 import { ensurePaths } from "./lib/paths"
+import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
@@ -78,17 +79,18 @@ export async function runServer(options: RunServerOptions): Promise<void> {
       },
     )
 
-    child.spawn("claude", [], {
-      detached: true,
-      stdio: "ignore",
-      shell: true,
-      env: {
+    const command = generateEnvScript(
+      {
         ANTHROPIC_BASE_URL: serverUrl,
         ANTHROPIC_AUTH_TOKEN: "dummy",
         ANTHROPIC_MODEL: selectedModel,
         ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
       },
-    })
+      "claude",
+    )
+
+    clipboard.writeSync(command)
+    consola.success("Copied Claude Code command to clipboard!")
   }
 
   serve({
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
index db70a2732..536893617 100644
--- a/src/routes/messages/stream-translation.ts
+++ b/src/routes/messages/stream-translation.ts
@@ -23,6 +23,12 @@ export function translateChunkToAnthropicEvents(
 ): Array<AnthropicStreamEventData> {
   const events: Array<AnthropicStreamEventData> = []
 
+  // @ts-expect-error sometimes chunk.choices is empty, and idk why
+  // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+  if (chunk.choices.length === 0) {
+    return events
+  }
+
   const choice = chunk.choices[0]
   const { delta } = choice
 

From 4d358060f1871f379ccfb5bdb9cea0f923dae3d2 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 22:26:14 +0700
Subject: [PATCH 23/25] feat: Add Claude Code integration and documentation

---
 README.md    | 19 ++++++++++++++++++-
 package.json |  2 +-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0e013da7d..b6ab5f3f7 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 
 ## Project Overview
 
-A reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API.
+A reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API, including to power [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview).
 
 ## Demo
 
@@ -82,6 +82,7 @@ The following command line options are available for the `start` command:
 | --rate-limit   | Rate limit in seconds between requests                                        | none       | -r    |
 | --wait         | Wait instead of error when rate limit is hit                                  | false      | -w    |
 | --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none       | -g    |
+| --claude-code  | Generate a command to launch Claude Code with Copilot API config              | false      | -c    |
 
 ### Auth Command Options
 
@@ -148,6 +149,22 @@ npx copilot-api@latest auth
 npx copilot-api@latest auth --verbose
 ```
 
+## Using with Claude Code
+
+This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.
+
+To get started, run the `start` command with the `--claude-code` flag:
+
+```sh
+npx copilot-api@latest start --claude-code
+```
+
+You will be prompted to select a primary model and a "small, fast" model for background tasks. After selecting the models, a command will be copied to your clipboard. This command sets the necessary environment variables for Claude Code to use the proxy.
+
+Paste and run this command in a new terminal to launch Claude Code.
+
+You can find more options here: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables)
+
 ## Running from Source
 
 The project can be run from source in several ways:
diff --git a/package.json b/package.json
index d65e68211..e745d7446 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "copilot-api",
   "version": "0.4.0",
-  "description": "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools.",
+  "description": "A wrapper around GitHub Copilot API to make it OpenAI and Anthropic compatible, making it usable for other tools like Claude Code.",
   "keywords": [
     "proxy",
     "github-copilot",

From cf650c7b5e4db9e129db13095dbcf06211dd8523 Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 22:44:10 +0700
Subject: [PATCH 24/25] build: Add claude settings file

---
 .claude/settings.json | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 .claude/settings.json

diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 000000000..b2dc4e6ca
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,8 @@
+{
+  "env": {
+    "ANTHROPIC_BASE_URL": "http://localhost:4141",
+    "ANTHROPIC_AUTH_TOKEN": "dummy",
+    "ANTHROPIC_MODEL": "gpt-4.1",
+    "ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1"
+  }
+}

From 5a745a5d49071378c2ffe1b374a2e53876196b3e Mon Sep 17 00:00:00 2001
From: Erick Christian <erickchristian48@gmail.com>
Date: Fri, 20 Jun 2025 22:50:07 +0700
Subject: [PATCH 25/25] docs: Add instructions for Claude Code integration

---
 README.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/README.md b/README.md
index b6ab5f3f7..8571d06f9 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,10 @@ npx copilot-api@latest auth --verbose
 
 This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.
 
+There are two ways to configure Claude Code to use this proxy:
+
+### Interactive Setup with `--claude-code` flag
+
 To get started, run the `start` command with the `--claude-code` flag:
 
 ```sh
@@ -163,8 +167,27 @@ You will be prompted to select a primary model and a "small, fast" model for bac
 
 Paste and run this command in a new terminal to launch Claude Code.
 
+### Manual Configuration with `settings.json`
+
+Alternatively, you can configure Claude Code by creating a `.claude/settings.json` file in your project's root directory. This file should contain the environment variables needed by Claude Code. This way you don't need to run the interactive setup every time.
+
+Here is an example `.claude/settings.json` file:
+
+```json
+{
+  "env": {
+    "ANTHROPIC_BASE_URL": "http://localhost:4141",
+    "ANTHROPIC_AUTH_TOKEN": "dummy",
+    "ANTHROPIC_MODEL": "gpt-4.1",
+    "ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1"
+  }
+}
+```
+
 You can find more options here: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables)
 
+You can also read more about IDE integration here: [Add Claude Code to your IDE](https://docs.anthropic.com/en/docs/claude-code/ide-integrations)
+
 ## Running from Source
 
 The project can be run from source in several ways: