diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 000000000..b2dc4e6ca
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,8 @@
+{
+  "env": {
+    "ANTHROPIC_BASE_URL": "http://localhost:4141",
+    "ANTHROPIC_AUTH_TOKEN": "dummy",
+    "ANTHROPIC_MODEL": "gpt-4.1",
+    "ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1"
+  }
+}
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 000000000..ef328b4e2
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,37 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Development Commands
+
+- **Install dependencies**: `bun install`
+- **Build**: `bun run build`
+- **Dev server (watch)**: `bun run dev`
+- **Production start**: `bun run start`
+- **Lint**: `bun run lint`
+- **Pre-commit lint/fix**: Runs automatically via git hooks (bunx eslint --fix)
+
+## Architecture Overview
+
+- **Entry point**: `src/main.ts` defines CLI subcommands (`start` and `auth`) for the Copilot API server and authentication flow.
+- **Server**: `src/server.ts` sets up HTTP routes using Hono, maps OpenAI/Anthropic-compatible endpoints, and handles logging/cors.
+- **Routes**: Handlers for chat completions, embeddings, models, and messages are under `src/routes/`, providing API endpoints compatible with OpenAI and Anthropic APIs.
+- **Copilot communication**: `src/services/copilot/` contains methods for proxying requests (chat completions, model listing, embeddings) to the GitHub Copilot backend using user tokens.
+- **Lib utilities**: `src/lib/` contains configuration, token, model caching, and error handling helpers.
+- **Authentication**: `src/auth.ts` provides the CLI handler for authenticating with GitHub, managing required tokens, and persisting them locally.
+
+## API Endpoints
+
+- **OpenAI-compatible**:
+  - `POST /v1/chat/completions`
+  - `GET /v1/models`
+  - `POST /v1/embeddings`
+- **Anthropic-compatible**:
+  - `POST /v1/messages`
+  - `POST /v1/messages/count_tokens`
+
+## Other Notes
+
+- Ensure Bun (>= 1.2.x) is installed for all scripts and local dev.
+- Tokens and cache are handled automatically; manual authentication can be forced with the `auth` subcommand.
+- No .cursorrules, .github/copilot-instructions.md, or .cursor/rules found, so follow typical TypeScript/Bun/ESLint conventions as seen in this codebase.
diff --git a/README.md b/README.md
index 4645336ed..8571d06f9 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,13 @@
-# Copilot API
+# Copilot API Proxy
 
-⚠️ **EDUCATIONAL PURPOSE ONLY** ⚠️
-This project is a reverse-engineered implementation of the GitHub Copilot API created for educational purposes only. It is not officially supported by GitHub and should not be used in production environments.
+> [!WARNING]
+> This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
 [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/E1E519XS7W)
 
 ## Project Overview
 
-A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools like AI assistants, local interfaces, and development utilities.
+A reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API, including to power [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview).
 
 ## Demo
 
@@ -16,7 +16,7 @@ https://github.com/user-attachments/assets/7654b383-669d-4eb9-b23c-06d7aefee8c5
 ## Prerequisites
 
 - Bun (>= 1.2.x)
-- GitHub account with Copilot subscription (Individual or Business)
+- GitHub account with Copilot subscription (individual, business, or enterprise)
 
 ## Installation
 
@@ -64,7 +64,7 @@ npx copilot-api@latest auth
 
 Copilot API now uses a subcommand structure with two main commands:
 
-- `start`: Start the Copilot API server (default command). This command will also handle authentication if needed.
+- `start`: Start the Copilot API server. This command will also handle authentication if needed.
 - `auth`: Run GitHub authentication flow without starting the server. This is typically used if you need to generate a token for use with the `--github-token` option, especially in non-interactive environments.
 
 ## Command Line Options
@@ -73,15 +73,16 @@ Copilot API now uses a subcommand structure with two main commands:
 
 The following command line options are available for the `start` command:
 
-| Option         | Description                                                                   | Default | Alias |
-| -------------- | ----------------------------------------------------------------------------- | ------- | ----- |
-| --port         | Port to listen on                                                             | 4141    | -p    |
-| --verbose      | Enable verbose logging                                                        | false   | -v    |
+| Option         | Description                                                                   | Default    | Alias |
+| -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
+| --port         | Port to listen on                                                             | 4141       | -p    |
+| --verbose      | Enable verbose logging                                                        | false      | -v    |
 | --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
-| --manual       | Enable manual request approval                                                | false   | none  |
-| --rate-limit   | Rate limit in seconds between requests                                        | none    | -r    |
-| --wait         | Wait instead of error when rate limit is hit                                  | false   | -w    |
-| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none    | -g    |
+| --manual       | Enable manual request approval                                                | false      | none  |
+| --rate-limit   | Rate limit in seconds between requests                                        | none       | -r    |
+| --wait         | Wait instead of error when rate limit is hit                                  | false      | -w    |
+| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none       | -g    |
+| --claude-code  | Generate a command to launch Claude Code with Copilot API config              | false      | -c    |
 
 ### Auth Command Options
 
@@ -89,6 +90,29 @@ The following command line options are available for the `start` command:
 | --------- | ---------------------- | ------- | ----- |
 | --verbose | Enable verbose logging | false   | -v    |
 
+## API Endpoints
+
+The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
+
+### OpenAI Compatible Endpoints
+
+These endpoints mimic the OpenAI API structure.
+
+| Endpoint                    | Method | Description                                               |
+| --------------------------- | ------ | --------------------------------------------------------- |
+| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
+| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
+| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
+
+### Anthropic Compatible Endpoints
+
+These endpoints are designed to be compatible with the Anthropic Messages API.
+
+| Endpoint                         | Method | Description                                                  |
+| -------------------------------- | ------ | ------------------------------------------------------------ |
+| `POST /v1/messages`              | `POST` | Creates a model response for a given conversation.           |
+| `POST /v1/messages/count_tokens` | `POST` | Calculates the number of tokens for a given set of messages. |
+
 ## Example Usage
 
 Using with npx:
@@ -125,6 +149,45 @@ npx copilot-api@latest auth
 npx copilot-api@latest auth --verbose
 ```
 
+## Using with Claude Code
+
+This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.
+
+There are two ways to configure Claude Code to use this proxy:
+
+### Interactive Setup with `--claude-code` flag
+
+To get started, run the `start` command with the `--claude-code` flag:
+
+```sh
+npx copilot-api@latest start --claude-code
+```
+
+You will be prompted to select a primary model and a "small, fast" model for background tasks. After selecting the models, a command will be copied to your clipboard. This command sets the necessary environment variables for Claude Code to use the proxy.
+
+Paste and run this command in a new terminal to launch Claude Code.
+
+### Manual Configuration with `settings.json`
+
+Alternatively, you can configure Claude Code by creating a `.claude/settings.json` file in your project's root directory. This file should contain the environment variables needed by Claude Code. This way you don't need to run the interactive setup every time.
+
+Here is an example `.claude/settings.json` file:
+
+```json
+{
+  "env": {
+    "ANTHROPIC_BASE_URL": "http://localhost:4141",
+    "ANTHROPIC_AUTH_TOKEN": "dummy",
+    "ANTHROPIC_MODEL": "gpt-4.1",
+    "ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1"
+  }
+}
+```
+
+You can find more options here: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables)
+
+You can also read more about IDE integration here: [Add Claude Code to your IDE](https://docs.anthropic.com/en/docs/claude-code/ide-integrations)
+
 ## Running from Source
 
 The project can be run from source in several ways:
@@ -143,18 +206,8 @@ bun run start
 
 ## Usage Tips
 
-- Consider using free models (e.g., Gemini, Mistral, Openrouter) as the `weak-model`
-- Use architect mode sparingly
-- Disable `yes-always` in your aider configuration
-- Enable the `--manual` flag to review and approve each request before processing
+- To avoid hitting GitHub Copilot's rate limits, you can use the following flags:
+  - `--manual`: Enables manual approval for each request, giving you full control over when requests are sent.
+  - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
+  - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
-
-### Manual Request Approval
-
-When using the `--manual` flag, the server will prompt you to approve each incoming request:
-
-```
-? Accept incoming request? > (y/N)
-```
-
-This helps you control usage and monitor requests in real-time.
diff --git a/bun.lock b/bun.lock
index b9161d361..8633525a8 100644
--- a/bun.lock
+++ b/bun.lock
@@ -5,11 +5,13 @@
       "name": "copilot-api",
       "dependencies": {
         "citty": "^0.1.6",
+        "clipboardy": "^4.0.0",
         "consola": "^3.4.2",
         "fetch-event-stream": "^0.1.5",
         "gpt-tokenizer": "^3.0.1",
-        "hono": "^4.7.11",
+        "hono": "^4.8.1",
         "srvx": "^0.8.0",
+        "tiny-invariant": "^1.3.3",
       },
       "devDependencies": {
         "@echristian/eslint-config": "^0.0.43",
@@ -17,7 +19,7 @@
         "bumpp": "^10.2.0",
         "eslint": "^9.29.0",
         "jiti": "^2.4.2",
-        "knip": "^5.61.1",
+        "knip": "^5.61.2",
         "lint-staged": "^16.1.2",
         "prettier-plugin-packagejson": "^2.5.15",
         "simple-git-hooks": "^2.13.0",
@@ -343,6 +345,8 @@
 
     "cli-truncate": ["cli-truncate@4.0.0", "", { "dependencies": { "slice-ansi": "^5.0.0", "string-width": "^7.0.0" } }, "sha512-nPdaFdQ0h/GEigbPClz11D0v/ZJEwxmeVZGeMo3Z5StPtUTkA9o1lD6QwoirYiSDzbcwn2XcjwmCp68W1IS4TA=="],
 
+    "clipboardy": ["clipboardy@4.0.0", "", { "dependencies": { "execa": "^8.0.1", "is-wsl": "^3.1.0", "is64bit": "^2.0.0" } }, "sha512-5mOlNS0mhX0707P2I0aZ2V/cmHUEO/fL7VFLqszkhUsxt7RwnmrInf/eEQKlf5GzvYeHIjT+Ov1HRfNmymlG0w=="],
+
     "cliui": ["cliui@8.0.1", "", { "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" } }, "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ=="],
 
     "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="],
@@ -479,6 +483,8 @@
 
     "eventemitter3": ["eventemitter3@5.0.1", "", {}, "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA=="],
 
+    "execa": ["execa@8.0.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^8.0.1", "human-signals": "^5.0.0", "is-stream": "^3.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^5.1.0", "onetime": "^6.0.0", "signal-exit": "^4.1.0", "strip-final-newline": "^3.0.0" } }, "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg=="],
+
     "exsolve": ["exsolve@1.0.5", "", {}, "sha512-pz5dvkYYKQ1AHVrgOzBKWeP4u4FRb3a6DNK2ucr0OoNwYIU4QWsJ+NM36LLzORT+z845MzKHHhpXiUF5nvQoJg=="],
 
     "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
@@ -535,6 +541,8 @@
 
     "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="],
 
+    "get-stream": ["get-stream@8.0.1", "", {}, "sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA=="],
+
     "get-symbol-description": ["get-symbol-description@1.1.0", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6" } }, "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg=="],
 
     "giget": ["giget@2.0.0", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.0", "defu": "^6.1.4", "node-fetch-native": "^1.6.6", "nypm": "^0.6.0", "pathe": "^2.0.3" }, "bin": { "giget": "dist/cli.mjs" } }, "sha512-L5bGsVkxJbJgdnwyuheIunkGatUF/zssUoxxjACCseZYAVbaqdh9Tsmmlkl8vYan09H7sbvKt4pS8GqKLBrEzA=="],
@@ -569,7 +577,9 @@
 
     "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],
 
-    "hono": ["hono@4.7.11", "", {}, "sha512-rv0JMwC0KALbbmwJDEnxvQCeJh+xbS3KEWW5PC9cMJ08Ur9xgatI0HmtgYZfOdOSOeYsp5LO2cOhdI8cLEbDEQ=="],
+    "hono": ["hono@4.8.1", "", {}, "sha512-ErA2ifywnSmcnB5XDuFqGDfXJ9xuAJR2C/8cZAk6vDaOCzofB8eNlha/wZWIiamREzWk94S9Z7wHsnKQHn7Niw=="],
+
+    "human-signals": ["human-signals@5.0.0", "", {}, "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ=="],
 
     "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
 
@@ -597,6 +607,8 @@
 
     "is-date-object": ["is-date-object@1.1.0", "", { "dependencies": { "call-bound": "^1.0.2", "has-tostringtag": "^1.0.2" } }, "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg=="],
 
+    "is-docker": ["is-docker@3.0.0", "", { "bin": { "is-docker": "cli.js" } }, "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ=="],
+
     "is-extglob": ["is-extglob@2.1.1", "", {}, "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ=="],
 
     "is-finalizationregistry": ["is-finalizationregistry@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg=="],
@@ -609,6 +621,8 @@
 
     "is-immutable-type": ["is-immutable-type@5.0.1", "", { "dependencies": { "@typescript-eslint/type-utils": "^8.0.0", "ts-api-utils": "^2.0.0", "ts-declaration-location": "^1.0.4" }, "peerDependencies": { "eslint": "*", "typescript": ">=4.7.4" } }, "sha512-LkHEOGVZZXxGl8vDs+10k3DvP++SEoYEAJLRk6buTFi6kD7QekThV7xHS0j6gpnUCQ0zpud/gMDGiV4dQneLTg=="],
 
+    "is-inside-container": ["is-inside-container@1.0.0", "", { "dependencies": { "is-docker": "^3.0.0" }, "bin": { "is-inside-container": "cli.js" } }, "sha512-KIYLCCJghfHZxqjYBE7rEy0OBuTd5xCHS7tHVgvCLkx7StIoaxwNW3hCALgEUjFfeRk+MG/Qxmp/vtETEF3tRA=="],
+
     "is-map": ["is-map@2.0.3", "", {}, "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw=="],
 
     "is-number": ["is-number@7.0.0", "", {}, "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng=="],
@@ -623,6 +637,8 @@
 
     "is-shared-array-buffer": ["is-shared-array-buffer@1.0.4", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A=="],
 
+    "is-stream": ["is-stream@3.0.0", "", {}, "sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA=="],
+
     "is-string": ["is-string@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA=="],
 
     "is-symbol": ["is-symbol@1.1.1", "", { "dependencies": { "call-bound": "^1.0.2", "has-symbols": "^1.1.0", "safe-regex-test": "^1.1.0" } }, "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w=="],
@@ -635,6 +651,10 @@
 
     "is-weakset": ["is-weakset@2.0.4", "", { "dependencies": { "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" } }, "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ=="],
 
+    "is-wsl": ["is-wsl@3.1.0", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw=="],
+
+    "is64bit": ["is64bit@2.0.0", "", { "dependencies": { "system-architecture": "^0.1.0" } }, "sha512-jv+8jaWCl0g2lSBkNSVXdzfBA0npK1HGC2KtWM9FumFRoGS94g3NbCCLVnCYHLjp4GrW2KZeeSTMo5ddtznmGw=="],
+
     "isarray": ["isarray@2.0.5", "", {}, "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw=="],
 
     "isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="],
@@ -665,7 +685,7 @@
 
     "keyv": ["keyv@4.5.4", "", { "dependencies": { "json-buffer": "3.0.1" } }, "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw=="],
 
-    "knip": ["knip@5.61.1", "", { "dependencies": { "@nodelib/fs.walk": "^1.2.3", "fast-glob": "^3.3.3", "formatly": "^0.2.4", "jiti": "^2.4.2", "js-yaml": "^4.1.0", "minimist": "^1.2.8", "oxc-resolver": "^11.1.0", "picocolors": "^1.1.1", "picomatch": "^4.0.1", "smol-toml": "^1.3.4", "strip-json-comments": "5.0.2", "zod": "^3.22.4", "zod-validation-error": "^3.0.3" }, "peerDependencies": { "@types/node": ">=18", "typescript": ">=5.0.4" }, "bin": { "knip": "bin/knip.js", "knip-bun": "bin/knip-bun.js" } }, "sha512-keywAzpu8R9S50JRT3qxilb1i/pv3ztBHhZ3tRuHvRclqfhfPkY7kb/G6l4q7zozbyndidSr7IScvayG76HtkA=="],
+    "knip": ["knip@5.61.2", "", { "dependencies": { "@nodelib/fs.walk": "^1.2.3", "fast-glob": "^3.3.3", "formatly": "^0.2.4", "jiti": "^2.4.2", "js-yaml": "^4.1.0", "minimist": "^1.2.8", "oxc-resolver": "^11.1.0", "picocolors": "^1.1.1", "picomatch": "^4.0.1", "smol-toml": "^1.3.4", "strip-json-comments": "5.0.2", "zod": "^3.22.4", "zod-validation-error": "^3.0.3" }, "peerDependencies": { "@types/node": ">=18", "typescript": ">=5.0.4" }, "bin": { "knip": "bin/knip.js", "knip-bun": "bin/knip-bun.js" } }, "sha512-ZBv37zDvZj0/Xwk0e93xSjM3+5bjxgqJ0PH2GlB5tnWV0ktXtmatWLm+dLRUCT/vpO3SdGz2nNAfvVhuItUNcQ=="],
 
     "language-subtag-registry": ["language-subtag-registry@0.3.23", "", {}, "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ=="],
 
@@ -697,10 +717,14 @@
 
     "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="],
 
+    "merge-stream": ["merge-stream@2.0.0", "", {}, "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w=="],
+
     "merge2": ["merge2@1.4.1", "", {}, "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg=="],
 
     "micromatch": ["micromatch@4.0.8", "", { "dependencies": { "braces": "^3.0.3", "picomatch": "^2.3.1" } }, "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA=="],
 
+    "mimic-fn": ["mimic-fn@4.0.0", "", {}, "sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw=="],
+
     "mimic-function": ["mimic-function@5.0.1", "", {}, "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA=="],
 
     "min-indent": ["min-indent@1.0.1", "", {}, "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg=="],
@@ -729,6 +753,8 @@
 
     "node-releases": ["node-releases@2.0.19", "", {}, "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw=="],
 
+    "npm-run-path": ["npm-run-path@5.3.0", "", { "dependencies": { "path-key": "^4.0.0" } }, "sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ=="],
+
     "nypm": ["nypm@0.6.0", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.0", "pathe": "^2.0.3", "pkg-types": "^2.0.0", "tinyexec": "^0.3.2" }, "bin": { "nypm": "dist/cli.mjs" } }, "sha512-mn8wBFV9G9+UFHIrq+pZ2r2zL4aPau/by3kJb3cM7+5tQHMt6HGQB8FDIeKFYp8o0D2pnH6nVsO88N4AmUxIWg=="],
 
     "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="],
@@ -745,7 +771,7 @@
 
     "ohash": ["ohash@2.0.11", "", {}, "sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ=="],
 
-    "onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
+    "onetime": ["onetime@6.0.0", "", { "dependencies": { "mimic-fn": "^4.0.0" } }, "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ=="],
 
     "optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="],
 
@@ -901,6 +927,8 @@
 
     "strip-ansi-cjs": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
 
+    "strip-final-newline": ["strip-final-newline@3.0.0", "", {}, "sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw=="],
+
     "strip-indent": ["strip-indent@4.0.0", "", { "dependencies": { "min-indent": "^1.0.1" } }, "sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA=="],
 
     "strip-json-comments": ["strip-json-comments@5.0.2", "", {}, "sha512-4X2FR3UwhNUE9G49aIsJW5hRRR3GXGTBTZRMfv568O60ojM8HcWjV/VxAxCDW3SUND33O6ZY66ZuRcdkj73q2g=="],
@@ -911,10 +939,14 @@
 
     "synckit": ["synckit@0.11.8", "", { "dependencies": { "@pkgr/core": "^0.2.4" } }, "sha512-+XZ+r1XGIJGeQk3VvXhT6xx/VpbHsRzsTkGgF6E5RX9TTXD0118l87puaEBZ566FhqblC6U0d4XnubznJDm30A=="],
 
+    "system-architecture": ["system-architecture@0.1.0", "", {}, "sha512-ulAk51I9UVUyJgxlv9M6lFot2WP3e7t8Kz9+IS6D4rVba1tR9kON+Ey69f+1R4Q8cd45Lod6a4IcJIxnzGc/zA=="],
+
     "thenify": ["thenify@3.3.1", "", { "dependencies": { "any-promise": "^1.0.0" } }, "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw=="],
 
     "thenify-all": ["thenify-all@1.6.0", "", { "dependencies": { "thenify": ">= 3.1.0 < 4" } }, "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA=="],
 
+    "tiny-invariant": ["tiny-invariant@1.3.3", "", {}, "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg=="],
+
     "tinyexec": ["tinyexec@1.0.1", "", {}, "sha512-5uC6DDlmeqiOwCPmK9jMSdOuZTh8bU39Ys6yidB+UTt5hfZUPGAypSgFRiEp+jbi9qH40BLDvy85jIU88wKSqw=="],
 
     "tinyglobby": ["tinyglobby@0.2.14", "", { "dependencies": { "fdir": "^6.4.4", "picomatch": "^4.0.2" } }, "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ=="],
@@ -1087,6 +1119,8 @@
 
     "mlly/pkg-types": ["pkg-types@1.3.1", "", { "dependencies": { "confbox": "^0.1.8", "mlly": "^1.7.4", "pathe": "^2.0.1" } }, "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ=="],
 
+    "npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="],
+
     "nypm/tinyexec": ["tinyexec@0.3.2", "", {}, "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA=="],
 
     "pkg-types/confbox": ["confbox@0.2.1", "", {}, "sha512-hkT3yDPFbs95mNCy1+7qNKC6Pro+/ibzYxtM2iqEigpf0sVw+bg4Zh9/snjsBcf990vfIsg5+1U7VyiyBb3etg=="],
@@ -1095,6 +1129,8 @@
 
     "regjsparser/jsesc": ["jsesc@3.0.2", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g=="],
 
+    "restore-cursor/onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
+
     "slice-ansi/ansi-styles": ["ansi-styles@6.2.1", "", {}, "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug=="],
 
     "sort-package-json/detect-indent": ["detect-indent@7.0.1", "", {}, "sha512-Mc7QhQ8s+cLrnUfU/Ji94vG/r8M26m8f++vyres4ZoojaRDpZ1eSIh/EpzLNwlWuvzSZ3UbDFspjFvTDXe6e/g=="],
diff --git a/docs/anthropic.md b/docs/anthropic.md
new file mode 100644
index 000000000..9c4766cb6
--- /dev/null
+++ b/docs/anthropic.md
@@ -0,0 +1,442 @@
+# Claude API Reference
+
+This document provides a condensed overview of the Anthropic Claude API, covering messages, token counting, and model management.
+
+---
+
+## Messages API
+
+The Messages API is the primary way to interact with Claude for multi-turn conversations and single queries.
+
+### Create a Message
+
+Creates a model response for the given conversation.
+
+**Endpoint:** `POST /v1/messages`
+
+#### Request Body
+
+The request body is a JSON object.
+
+| Parameter        | Type            | Required | Description                                                                                                                                           |
+| :--------------- | :-------------- | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`          | string          | Yes      | The model that will complete your prompt. Example: `claude-3-7-sonnet-20250219`.                                                                      |
+| `messages`       | array           | Yes      | A list of input messages comprising the conversation so far. See [The Message Object](https://www.google.com/search?q=%23the-message-object) below.   |
+| `max_tokens`     | integer         | Yes      | The maximum number of tokens to generate. Different models have different maximum values for this parameter.                                          |
+| `system`         | string or array | No       | A system prompt to provide context and instructions to Claude, such as specifying a role or goal.                                                     |
+| `metadata`       | object          | No       | An object for metadata, such as a `user_id`, to help detect abuse. Do not include any personally identifying information.                             |
+| `stop_sequences` | array           | No       | Custom text sequences that will cause the model to stop generating.                                                                                   |
+| `stream`         | boolean         | No       | If set, the response will be incrementally streamed using server-sent events. Defaults to `false`.                                                    |
+| `temperature`    | number          | No       | The amount of randomness injected into the response, ranging from `0.0` to `1.0`. Defaults to `1.0`.                                                  |
+| `thinking`       | object          | No       | Configuration for enabling Claude's extended thinking process, which shows reasoning steps before the final answer.                                   |
+| `top_p`          | number          | No       | Use nucleus sampling. The model considers tokens with `top_p` probability mass. You should alter `temperature` or `top_p`, but not both.              |
+| `top_k`          | integer         | No       | Only sample from the top K options for each subsequent token. Recommended for advanced use cases only.                                                |
+| `tools`          | array           | No       | A list of tools the model may use. See [The Tool Object](https://www.google.com/search?q=%23the-tool-object) below.                                   |
+| `tool_choice`    | object          | No       | Controls how the model should use tools. Can be `{"type": "auto"}`, `{"type": "any"}`, `{"type": "tool", "name": "tool_name"}` or `{"type": "none"}`. |
+| `service_tier`   | string          | No       | Can be set to `auto` or `standard_only` to determine whether to use priority capacity.                                                                |
+
+#### The Message Object
+
+The `messages` array consists of message objects, where each object has a `role` and `content`. Models are trained on alternating `user` and `assistant` conversational turns.
+
+| Parameter | Type            | Required | Description                                                                                                 |
+| :-------- | :-------------- | :------- | :---------------------------------------------------------------------------------------------------------- |
+| `role`    | string          | Yes      | The role of the message author. Must be either `user` or `assistant`.                                       |
+| `content` | string or array | Yes      | The content of the message. This can be a simple string or an array of content blocks for multimodal input. |
+
+**Content Blocks:** For multimodal input, the `content` array can contain different types of blocks.
+
+- **`text`**: A block with a `type` of "text" and a `text` field containing the string.
+- **`image`**: Starting with Claude 3 models, you can send image content blocks. The `source` object must specify a `type` of "base64", a `media_type` (`image/jpeg`, `image/png`, `image/gif`, or `image/webp`), and the `data`.
+- **`tool_result`**: A block used to return the output of a tool back to the model. It includes the `tool_use_id`, the `content` from the tool's execution, and an optional `is_error` flag.
+
+#### The Tool Object
+
+The `tools` array allows you to define client-side tools the model can call.
+
+| Parameter      | Type   | Required | Description                                                                                                      |
+| :------------- | :----- | :------- | :--------------------------------------------------------------------------------------------------------------- |
+| `name`         | string | Yes      | The name of the tool, which must match the pattern `^[a-zA-Z0-9_-]{1,64}$`.                                      |
+| `description`  | string | No       | A detailed, strongly-recommended description of what the tool does, which helps the model decide when to use it. |
+| `input_schema` | object | Yes      | A [JSON Schema](https://json-schema.org/draft/2020-12) object describing the parameters the tool accepts.        |
+
+#### Response (200 OK)
+
+A successful **non-streaming** request returns a `Message` object.
+
+| Parameter       | Type   | Description                                                                                                                               |
+| :-------------- | :----- | :---------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`            | string | A unique identifier for the message object.                                                                                               |
+| `type`          | string | The object type, which is always `message`.                                                                                               |
+| `role`          | string | The role of the author, which is always `assistant`.                                                                                      |
+| `content`       | array  | An array of content blocks generated by the model (e.g., `text` or `tool_use`).                                                           |
+| `model`         | string | The model that handled the request.                                                                                                       |
+| `stop_reason`   | string | The reason the model stopped generating tokens. Can be `end_turn`, `max_tokens`, `stop_sequence`, `tool_use`, `pause_turn`, or `refusal`. |
+| `stop_sequence` | string | If the model was stopped by a custom stop sequence, this field will contain which sequence was generated. Can be null.                    |
+| `usage`         | object | An object containing token usage statistics. See [The Usage Object](https://www.google.com/search?q=%23the-usage-object).                 |
+
+#### Streaming Response (200 OK)
+
+When `stream: true` is set, the API streams the response using server-sent events (SSE). Each event is named (e.g., `event: message_start`) and contains associated JSON data.
+
+The event flow for a stream is as follows:
+
+1.  `message_start`: Contains a `Message` object with empty `content`.
+2.  A series of content blocks. Each block has a `content_block_start` event, one or more `content_block_delta` events, and a `content_block_stop` event. The `index` in these events corresponds to the content block's position in the final `content` array.
+3.  One or more `message_delta` events, which indicate top-level changes to the final `Message` object. The `usage` field in this event contains cumulative token counts.
+4.  A final `message_stop` event.
+
+The stream may also include `ping` events to keep the connection alive and `error` events if issues occur.
+
+##### Content Block Delta Types
+
+Each `content_block_delta` event contains a `delta` object that updates a content block.
+
+- **Text Delta**: Updates a `text` content block.
+
+  ```json
+  event: content_block_delta
+  data: {"type": "content_block_delta","index": 0,"delta": {"type": "text_delta", "text": "ello frien"}}
+  ```
+
+- **Input JSON Delta**: Used for `tool_use` blocks, these deltas contain partial JSON strings for the tool's `input` field. The partial strings must be accumulated and parsed into a final JSON object upon receiving the `content_block_stop` event.
+
+  ```json
+  event: content_block_delta
+  data: {"type": "content_block_delta","index": 1,"delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}}
+  ```
+
+- **Thinking Delta**: When extended thinking is enabled, these deltas update the `thinking` field of a thinking content block. A special `signature_delta` event is sent just before the `content_block_stop` to verify the block's integrity.
+
+  ```json
+  event: content_block_delta
+  data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\n\n1. First break down 27 * 453"}}
+  ```
+
+#### The Usage Object
+
+The `usage` object details billing and rate-limit token counts.
+
+| Parameter                     | Type    | Description                                                                 |
+| :---------------------------- | :------ | :-------------------------------------------------------------------------- |
+| `input_tokens`                | integer | The number of input tokens used.                                            |
+| `output_tokens`               | integer | The number of output tokens generated.                                      |
+| `cache_creation_input_tokens` | integer | The number of input tokens used to create a cache entry.                    |
+| `cache_read_input_tokens`     | integer | The number of input tokens read from the cache.                             |
+| `service_tier`                | string  | The service tier used for the request (`standard`, `priority`, or `batch`). |
+
+### Streaming Examples
+
+#### Basic Streaming Request
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+     --header "anthropic-version: 2023-06-01" \
+     --header "content-type: application/json" \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --data \
+'{
+  "model": "claude-opus-4-20250514",
+  "messages": [{"role": "user", "content": "Hello"}],
+  "max_tokens": 256,
+  "stream": true
+}'
+```
+
+**Response:**
+
+```json
+event: message_start
+data: {"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-opus-4-20250514", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25, "output_tokens": 1}}}
+
+event: content_block_start
+data: {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}}
+
+event: ping
+data: {"type": "ping"}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "Hello"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "!"}}
+
+event: content_block_stop
+data: {"type": "content_block_stop", "index": 0}
+
+event: message_delta
+data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence":null}, "usage": {"output_tokens": 15}}
+
+event: message_stop
+data: {"type": "message_stop"}
+```
+
+#### Streaming Request with Tool Use
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+  -H "content-type: application/json" \
+  -H "x-api-key: $ANTHROPIC_API_KEY" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{
+    "model": "claude-opus-4-20250514",
+    "max_tokens": 1024,
+    "tools": [
+      {
+        "name": "get_weather",
+        "description": "Get the current weather in a given location",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "location": {
+              "type": "string",
+              "description": "The city and state, e.g. San Francisco, CA"
+            }
+          },
+          "required": ["location"]
+        }
+      }
+    ],
+    "tool_choice": {"type": "any"},
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is the weather like in San Francisco?"
+      }
+    ],
+    "stream": true
+  }'
+```
+
+**Response:**
+
+```json
+event: message_start
+data: {"type":"message_start","message":{"id":"msg_014p7gG3wDgGV9EUtLvnow3U","type":"message","role":"assistant","model":"claude-opus-4-20250514","stop_sequence":null,"usage":{"input_tokens":472,"output_tokens":2},"content":[],"stop_reason":null}}
+
+event: content_block_start
+data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
+
+event: ping
+data: {"type": "ping"}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Okay"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" let"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"'s"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" check"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" the"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" weather"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" for"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" San"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" Francisco"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":","}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" CA"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":":"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: content_block_start
+data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"location\":"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" \"San"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" Francisc"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"o,"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":" CA\""}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":", "}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"unit\": \"fah"}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"renheit\"}"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":1}
+
+event: message_delta
+data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":89}}
+
+event: message_stop
+data: {"type":"message_stop"}
+```
+
+#### Streaming Request with Extended Thinking
+
+```bash
+curl https://api.anthropic.com/v1/messages \
+     --header "x-api-key: $ANTHROPIC_API_KEY" \
+     --header "anthropic-version: 2023-06-01" \
+     --header "content-type: application/json" \
+     --data \
+'{
+    "model": "claude-opus-4-20250514",
+    "max_tokens": 20000,
+    "stream": true,
+    "thinking": {
+        "type": "enabled",
+        "budget_tokens": 16000
+    },
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is 27 * 453?"
+        }
+    ]
+}'
+```
+
+**Response:**
+
+```json
+event: message_start
+data: {"type": "message_start", "message": {"id": "msg_01...", "type": "message", "role": "assistant", "content": [], "model": "claude-opus-4-20250514", "stop_reason": null, "stop_sequence": null}}
+
+event: content_block_start
+data: {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking", "thinking": ""}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\n\n1. First break down 27 * 453"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n2. 453 = 400 + 50 + 3"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n3. 27 * 400 = 10,800"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n4. 27 * 50 = 1,350"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n5. 27 * 3 = 81"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\n6. 10,800 + 1,350 + 81 = 12,231"}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 0, "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
+
+event: content_block_stop
+data: {"type": "content_block_stop", "index": 0}
+
+event: content_block_start
+data: {"type": "content_block_start", "index": 1, "content_block": {"type": "text", "text": ""}}
+
+event: content_block_delta
+data: {"type": "content_block_delta", "index": 1, "delta": {"type": "text_delta", "text": "27 * 453 = 12,231"}}
+
+event: content_block_stop
+data: {"type": "content_block_stop", "index": 1}
+
+event: message_delta
+data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence": null}}
+
+event: message_stop
+data: {"type": "message_stop"}
+```
+
+### Count Message Tokens
+
+Calculates the number of tokens for a given set of messages without creating it.
+
+**Endpoint:** `POST /v1/messages/count_tokens`
+
+#### Request Body
+
+The request accepts a subset of the "Create a Message" parameters.
+
+| Parameter  | Type            | Required | Description                          |
+| :--------- | :-------------- | :------- | :----------------------------------- |
+| `model`    | string          | Yes      | The model that would be used.        |
+| `messages` | array           | Yes      | A list of input messages.            |
+| `system`   | string or array | No       | A system prompt.                     |
+| `tools`    | array           | No       | A list of tools the model could use. |
+
+#### Response (200 OK)
+
+A successful request returns a JSON object.
+
+| Parameter      | Type    | Description                                                                                |
+| :------------- | :------ | :----------------------------------------------------------------------------------------- |
+| `input_tokens` | integer | The total number of tokens across the provided list of messages, system prompt, and tools. |
+
+---
+
+## Models API
+
+The Models API allows you to list and retrieve information about available models.
+
+### List Models
+
+Lists the currently available models, with the most recently released models appearing first.
+
+**Endpoint:** `GET /v1/models`
+
+#### Response (200 OK)
+
+A successful request returns a list of model objects.
+
+| Parameter  | Type    | Description                                                                     |
+| :--------- | :------ | :------------------------------------------------------------------------------ |
+| `data`     | array   | A list of [Model Objects](https://www.google.com/search?q=%23the-model-object). |
+| `has_more` | boolean | Indicates if there are more results in the requested page direction.            |
+
+### Get a Model
+
+Retrieves a specific model instance by its ID or alias.
+
+**Endpoint:** `GET /v1/models/{model_id}`
+
+#### Response (200 OK)
+
+A successful request returns a single [Model Object](https://www.google.com/search?q=%23the-model-object).
+
+#### The Model Object
+
+| Parameter      | Type   | Description                                                         |
+| :------------- | :----- | :------------------------------------------------------------------ |
+| `id`           | string | The unique model identifier. Example: `claude-3-7-sonnet-20250219`. |
+| `type`         | string | The object type, which is always `model`.                           |
+| `display_name` | string | A human-readable name for the model. Example: `Claude 3.7 Sonnet`.  |
+| `created_at`   | string | An RFC 3339 datetime string of when the model was released.         |
diff --git a/docs/mapping.md b/docs/mapping.md
new file mode 100644
index 000000000..93e1a2c52
--- /dev/null
+++ b/docs/mapping.md
@@ -0,0 +1,133 @@
+### **Comprehensive API Translation: Anthropic Messages & OpenAI Chat Completions**
+
+This document provides a detailed, side-by-side technical mapping of the Anthropic Messages API and the OpenAI Chat Completions API, verified against their respective specifications.
+
+---
+
+### **1. Endpoints & Authentication**
+
+| Feature              | Anthropic Messages API    | OpenAI Chat Completions API          |
+| :------------------- | :------------------------ | :----------------------------------- |
+| **Primary Endpoint** | `POST /v1/messages`       | `POST /v1/chat/completions`          |
+| **Auth Header**      | `x-api-key: YOUR_API_KEY` | `Authorization: Bearer YOUR_API_KEY` |
+
+---
+
+### **2. Core Request Parameters**
+
+This table outlines the translation of primary request body fields.
+
+| Parameter           | Anthropic Messages API (`claude.md`)         | OpenAI Chat Completions API (`openapi.documented.yml`)              |
+| :------------------ | :------------------------------------------- | :------------------------------------------------------------------ |
+| **Model**           | `model` (e.g., `claude-3-7-sonnet-20250219`) | `model` (e.g., `gpt-4o`)                                            |
+| **System Prompt**   | `system` (A top-level string)                | Prepending a message with `role: "system"` to the `messages` array. |
+| **Max Tokens**      | `max_tokens` (integer)                       | `max_tokens` (integer)                                              |
+| **Stop Sequences**  | `stop_sequences` (array of strings)          | `stop` (array of strings)                                           |
+| **Streaming**       | `stream` (boolean)                           | `stream` (boolean)                                                  |
+| **Temperature**     | `temperature` (0.0 to 1.0)                   | `temperature` (0.0 to 2.0)                                          |
+| **Top P**           | `top_p` (0.0 to 1.0)                         | `top_p` (0.0 to 1.0)                                                |
+| **Top K**           | `top_k` (integer)                            | **Not Supported**                                                   |
+| **User Identifier** | `metadata.user_id` (string)                  | `user` (string)                                                     |
+
+---
+
+### **3. Message Structure**
+
+Both APIs use a `messages` array, but the structure and content types differ.
+
+#### **3.1. Message Roles**
+
+| Role              | Anthropic Messages API                        | OpenAI Chat Completions API |
+| :---------------- | :-------------------------------------------- | :-------------------------- |
+| **User**          | `user`                                        | `user`                      |
+| **Assistant**     | `assistant`                                   | `assistant`                 |
+| **System**        | Handled via the top-level `system` parameter. | `system`                    |
+| **Tool/Function** | A `user` message with `tool_result` content.  | `tool`                      |
+
+#### **3.2. Message Content Types**
+
+| Content Type    | Anthropic Messages API (`claude.md`)                                                                               | OpenAI Chat Completions API (`openapi.documented.yml`)                                                   |
+| :-------------- | :----------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------- |
+| **Text**        | `content` can be a single string or an array with `{"type": "text", "text": "..."}`.                               | A message object's `content` is a string or an array with `{"type": "text", "text": "..."}`.             |
+| **Image**       | `content` array can contain `{"type": "image", "source": {"type": "base64", "media_type": "...", "data": "..."}}`. | `content` array can contain `{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}`. |
+| **Tool Result** | A `user` message `content` array can contain `{"type": "tool_result", "tool_use_id": "...", "content": "..."}`.    | A distinct message object: `{"role": "tool", "tool_call_id": "...", "content": "..."}`.                  |
+
+---
+
+### **4. Tool & Function Handling**
+
+| Feature                   | Anthropic Messages API (`claude.md`)                                                                                                 | OpenAI Chat Completions API (`openapi.documented.yml`)                                                                                                         |
+| :------------------------ | :----------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Tool Definition**       | `tools` array. Each tool has `name`, `description`, and `input_schema`.                                                              | `tools` array. Each tool has `type: "function"` and a `function` object with `name`, `description`, and `parameters`.                                          |
+| **Tool Choice**           | `tool_choice` object with `type`: \<br\> - `"auto"` \<br\> - `"any"` (Forces use of a tool) \<br\> - `"tool"` (Forces specific tool) | `tool_choice` string or object: \<br\> - `"auto"` \<br\> - `"required"` (Forces use of a tool) \<br\> - `{"type": "function", ...}` (Forces specific function) |
+| **Tool Call in Response** | In `content` array as `{"type": "tool_use", "id": "...", "name": "...", "input": {...}}`.                                            | In `message` object as a `tool_calls` array, with `id` and `function` object (`name`, `arguments` as JSON string).                                             |
+
+---
+
+### **5. Response Structure**
+
+| Feature              | Anthropic Messages API (`claude.md`)                                                           | OpenAI Chat Completions API (`openapi.documented.yml`)                           |
+| :------------------- | :--------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------- |
+| **Primary Object**   | A single response object.                                                                      | A `choices` array containing one or more message objects.                        |
+| **Stop Reason**      | `stop_reason` field. Values: `end_turn`, `max_tokens`, `tool_use`, `stop_sequence`, `refusal`. | `finish_reason` field. Values: `stop`, `length`, `tool_calls`, `content_filter`. |
+| **Usage Statistics** | `usage` object with `input_tokens` and `output_tokens`.                                        | `usage` object with `prompt_tokens`, `completion_tokens`, and `total_tokens`.    |
+
+---
+
+### **6. Model & Tokenization Endpoints**
+
+#### **6.1. List Available Models**
+
+| Feature           | Anthropic Messages API             | OpenAI Chat Completions API |
+| :---------------- | :--------------------------------- | :-------------------------- |
+| **Endpoint**      | `GET /v1/models`                   | `GET /v1/models`            |
+| **Response**      | Paginated list in `data` array.    | List in `data` array.       |
+| **Object Fields** | `id`, `display_name`, `created_at` | `id`, `created`, `owned_by` |
+
+#### **6.2. Retrieve a Specific Model**
+
+| Feature      | Anthropic Messages API       | OpenAI Chat Completions API |
+| :----------- | :--------------------------- | :-------------------------- |
+| **Endpoint** | `GET /v1/models/{model_id}`  | `GET /v1/models/{model}`    |
+| **Response** | A single `ModelInfo` object. | A single `Model` object.    |
+
+#### **6.3. Count Tokens**
+
+| Feature           | Anthropic Messages API                                                                                 | OpenAI Chat Completions API                                                                                                                                        |
+| :---------------- | :----------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Endpoint**      | `POST /v1/messages/count_tokens`                                                                       | **No Direct API Endpoint**                                                                                                                                         |
+| **Functionality** | Counts tokens for a message payload (including images and tools) before making a full completion call. | Token counts are returned in the `usage` object only _after_ a completion is generated. Client-side libraries (e.g., `tiktoken`) must be used for pre-calculation. |
+| **Response**      | `{"input_tokens": ...}`                                                                                | N/A                                                                                                                                                                |
+
+---
+
+### **7. Streaming**
+
+Both APIs support streaming via Server-Sent Events (SSE), but the event structure is fundamentally different.
+
+- **Anthropic:** Emits a sequence of distinct, named events such as `message_start`, `content_block_start`, `content_block_delta`, and `message_stop`. This provides a highly structured stream.
+- **OpenAI:** Emits a series of unnamed `data:` events containing `chat.completion.chunk` objects with partial updates. The stream terminates with `data: [DONE]`.
+
+A translation layer must buffer OpenAI's delta chunks to reconstruct Anthropic's structured event stream, including generating necessary IDs and calculating token usage for the final event.
+
+---
+
+### **8. Error Handling**
+
+Error responses are structurally similar, containing a main `error` object. HTTP status codes generally correspond.
+
+| HTTP Code | Anthropic `error.type`  | OpenAI `error.type`       |
+| :-------- | :---------------------- | :------------------------ |
+| 400       | `invalid_request_error` | `invalid_request_error`   |
+| 401       | `authentication_error`  | `authentication_error`    |
+| 403       | `permission_error`      | `permission_denied_error` |
+| 429       | `rate_limit_error`      | `rate_limit_error`        |
+| 500       | `api_error`             | `internal_server_error`   |
+
+---
+
+### **9. Summary of Key Asymmetrical Features**
+
+- **`top_k` Sampling:** Supported by Anthropic, but not by OpenAI's Chat Completions API.
+- **Partial Assistant Prefill:** Anthropic allows providing a prefix for the assistant's response, a feature OpenAI does not support.
+- **Dedicated Token Counting:** Anthropic offers a specific API endpoint to count tokens before a call, whereas OpenAI does not.
diff --git a/docs/openai.md b/docs/openai.md
new file mode 100644
index 000000000..47f5bf85a
--- /dev/null
+++ b/docs/openai.md
@@ -0,0 +1,192 @@
+# Create Chat Completion
+
+Creates a model response for the given chat conversation.
+
+**Endpoint:** `POST /v1/chat/completions`
+
+### Summary
+
+This endpoint generates a model response for a given conversation. It is a highly flexible endpoint that supports text generation, vision capabilities, and function calling.
+
+**Recommendation:** For new projects, it is recommended to use the [Responses API](/docs/api-reference/responses) to leverage the latest platform features. You can find a comparison here: [Chat Completions vs. Responses](/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+---
+
+## Request Body
+
+The request body must be a JSON object with the following parameters:
+
+| Parameter           | Type             | Required | Description                                                                                                                                                                                                                                            |
+| ------------------- | ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `messages`          | array            | Yes      | A list of messages comprising the conversation so far. See the [Message Object](#the-message-object) section below.                                                                                                                                    |
+| `model`             | string           | Yes      | ID of the model to use. See the [model overview](/docs/models) for available models.                                                                                                                                                                   |
+| `frequency_penalty` | number           | No       | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. Defaults to 0.                                              |
+| `logit_bias`        | map              | No       | A map to modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps token IDs (as keys) to an associated bias value from -100 to 100.                                                                      |
+| `logprobs`          | boolean          | No       | Whether to return log probabilities of the output tokens. If true, returns the log probabilities of each output token in the `content` of `message`. Defaults to `false`.                                                                              |
+| `max_tokens`        | integer          | No       | The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. **(Deprecated in favor of `max_completion_tokens` on newer models)**                  |
+| `n`                 | integer          | No       | How many chat completion choices to generate for each input message. Defaults to 1.                                                                                                                                                                    |
+| `presence_penalty`  | number           | No       | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. Defaults to 0.                                                           |
+| `response_format`   | object           | No       | An object specifying the format that the model must output. For example, `{"type": "json_object"}`.                                                                                                                                                    |
+| `seed`              | integer          | No       | (Beta) If specified, the system will make a best effort to sample deterministically.                                                                                                                                                                   |
+| `stop`              | string or array  | No       | Up to 4 sequences where the API will stop generating further tokens.                                                                                                                                                                                   |
+| `stream`            | boolean          | No       | If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available. Defaults to `false`.                                                                                       |
+| `temperature`       | number           | No       | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. Defaults to 1.                                                    |
+| `top_p`             | number           | No       | An alternative to sampling with temperature, called nucleus sampling. The model considers the results of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. Defaults to 1. |
+| `tools`             | array            | No       | A list of tools the model may call. See the [Tool Object](#the-tool-object) section below.                                                                                                                                                             |
+| `tool_choice`       | string or object | No       | Controls which, if any, tool is called by the model. Can be `none`, `auto`, `required`, or specify a particular function like `{"type": "function", "function": {"name": "my_function"}}`.                                                             |
+| `user`              | string           | No       | A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.                                                                                                                                                     |
+
+---
+
+### The Message Object
+
+The `messages` array consists of message objects, where each object has a `role` and `content`.
+
+| Parameter      | Type            | Required | Description                                                                                            |
+| -------------- | --------------- | -------- | ------------------------------------------------------------------------------------------------------ |
+| `role`         | string          | Yes      | The role of the author of this message. Can be `developer`, `system`, `user`, `assistant`, or `tool`.  |
+| `content`      | string or array | Yes      | The contents of the message. This can be a string or an array of content parts (for multimodal input). |
+| `name`         | string          | No       | An optional name for the participant, providing differentiation for participants of the same role.     |
+| `tool_calls`   | array           | No       | The tool calls generated by the model, if any.                                                         |
+| `tool_call_id` | string          | No       | The ID of the tool call that this message is responding to. (Required if `role` is `tool`).            |
+
+#### User Message Content Parts (Multimodal)
+
+When the `content` of a `user` message is an array, it can contain a mix of text and image parts.
+
+| Type        | Description                                                                                                             |
+| ----------- | ----------------------------------------------------------------------------------------------------------------------- |
+| `text`      | A text part, containing the string of text.                                                                             |
+| `image_url` | An image part, containing a URL or base64-encoded image data and an optional `detail` level (`low`, `high`, or `auto`). |
+
+### The Tool Object
+
+The `tools` array allows you to define functions the model can call.
+
+| Parameter  | Type   | Required | Description                                                |
+| ---------- | ------ | -------- | ---------------------------------------------------------- |
+| `type`     | string | Yes      | The type of tool. Currently, only `function` is supported. |
+| `function` | object | Yes      | An object defining the function. See below.                |
+
+#### The Function Object
+
+| Parameter     | Type   | Required | Description                                                                                                                   |
+| ------------- | ------ | -------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| `name`        | string | Yes      | The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. |
+| `description` | string | No       | A description of what the function does, used by the model to decide when to call it.                                         |
+| `parameters`  | object | No       | The parameters the function accepts, described as a JSON Schema object.                                                       |
+
+---
+
+## Responses
+
+### Successful Response (200 OK)
+
+A successful non-streaming request returns a JSON object with the following structure.
+
+| Parameter            | Type    | Description                                                                             |
+| -------------------- | ------- | --------------------------------------------------------------------------------------- |
+| `id`                 | string  | A unique identifier for the chat completion.                                            |
+| `object`             | string  | The object type, which is always `chat.completion`.                                     |
+| `created`            | integer | The Unix timestamp (in seconds) of when the completion was created.                     |
+| `model`              | string  | The model used for the chat completion.                                                 |
+| `choices`            | array   | A list of chat completion choices. See [The Choice Object](#the-choice-object).         |
+| `usage`              | object  | Usage statistics for the completion request. See [The Usage Object](#the-usage-object). |
+| `system_fingerprint` | string  | This fingerprint represents the backend configuration that the model runs with.         |
+
+#### The Choice Object
+
+| Parameter       | Type    | Description                                                                                                 |
+| --------------- | ------- | ----------------------------------------------------------------------------------------------------------- |
+| `index`         | integer | The index of the choice in the list of choices.                                                             |
+| `message`       | object  | A message object containing the generated response. See below.                                              |
+| `finish_reason` | string  | The reason the model stopped generating tokens. Can be `stop`, `length`, `tool_calls`, or `content_filter`. |
+| `logprobs`      | object  | Log probability information for the choice. Null if `logprobs` was not requested.                           |
+
+#### The Response Message Object
+
+| Parameter    | Type   | Description                                                                                                                                            |
+| ------------ | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `role`       | string | The role of the author, which will be `assistant`.                                                                                                     |
+| `content`    | string | The text content of the message. Can be null if `tool_calls` are present.                                                                              |
+| `tool_calls` | array  | The tool calls generated by the model, if any. Each object contains an `id`, `type` ('function'), and a `function` object with `name` and `arguments`. |
+
+#### The Usage Object
+
+| Parameter           | Type    | Description                                   |
+| ------------------- | ------- | --------------------------------------------- |
+| `prompt_tokens`     | integer | Number of tokens in the prompt.               |
+| `completion_tokens` | integer | Number of tokens in the generated completion. |
+| `total_tokens`      | integer | Total number of tokens used in the request.   |
+
+### Streaming Response (200 OK)
+
+If `stream: true` is set, the API streams back a sequence of server-sent events.
+
+Each event is a JSON object representing a `chat.completion.chunk`.
+
+#### The Chat Completion Chunk Object
+
+| Parameter | Type    | Description                                                              |
+| --------- | ------- | ------------------------------------------------------------------------ |
+| `id`      | string  | A unique identifier for the chat completion. Each chunk has the same ID. |
+| `object`  | string  | The object type, which is always `chat.completion.chunk`.                |
+| `created` | integer | The Unix timestamp of when the completion was created.                   |
+| `model`   | string  | The model used for the completion.                                       |
+| `choices` | array   | A list of choices, where each choice contains a `delta` object.          |
+
+#### The Delta Object
+
+The `delta` object contains the fields that have changed. It can include:
+
+- `role`: The role of the message author.
+- `content`: A partial string of the message content.
+- `tool_calls`: A partial list of tool calls, including the function `name` and partial `arguments`.
+
+The stream is terminated by a `data: [DONE]` message.
+
+---
+
+# Models
+
+List and describe the various models available in the API.
+
+## List Models
+
+Lists the currently available models, and provides basic information about each one such as the owner and availability.
+
+**Endpoint:** `GET /models`
+
+### Response Body
+
+A successful request returns a list of model objects.
+
+| Parameter | Type   | Description                              |
+| --------- | ------ | ---------------------------------------- |
+| `object`  | string | The object type, which is always "list". |
+| `data`    | array  | A list of model objects.                 |
+
+#### The Model Object
+
+| Parameter  | Type    | Description                                                     |
+| ---------- | ------- | --------------------------------------------------------------- |
+| `id`       | string  | The model identifier, which can be referenced in API endpoints. |
+| `object`   | string  | The object type, which is always "model".                       |
+| `created`  | integer | The Unix timestamp (in seconds) when the model was created.     |
+| `owned_by` | string  | The organization that owns the model.                           |
+
+## Retrieve a Model
+
+Retrieves a model instance, providing basic information about the model such as the owner and permissioning.
+
+**Endpoint:** `GET /models/{model}`
+
+### Path Parameters
+
+| Parameter | Type   | Required | Description                                  |
+| --------- | ------ | -------- | -------------------------------------------- |
+| `model`   | string | Yes      | The ID of the model to use for this request. |
+
+### Response Body
+
+A successful request returns a single [Model Object](#the-model-object).
diff --git a/package.json b/package.json
index 709bab9b0..e745d7446 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "copilot-api",
   "version": "0.4.0",
-  "description": "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools.",
+  "description": "A wrapper around GitHub Copilot API to make it OpenAI and Anthropic compatible, making it usable for other tools like Claude Code.",
   "keywords": [
     "proxy",
     "github-copilot",
@@ -39,11 +39,13 @@
   },
   "dependencies": {
     "citty": "^0.1.6",
+    "clipboardy": "^4.0.0",
     "consola": "^3.4.2",
     "fetch-event-stream": "^0.1.5",
     "gpt-tokenizer": "^3.0.1",
-    "hono": "^4.7.11",
-    "srvx": "^0.8.0"
+    "hono": "^4.8.1",
+    "srvx": "^0.8.0",
+    "tiny-invariant": "^1.3.3"
   },
   "devDependencies": {
     "@echristian/eslint-config": "^0.0.43",
@@ -51,7 +53,7 @@
     "bumpp": "^10.2.0",
     "eslint": "^9.29.0",
     "jiti": "^2.4.2",
-    "knip": "^5.61.1",
+    "knip": "^5.61.2",
     "lint-staged": "^16.1.2",
     "prettier-plugin-packagejson": "^2.5.15",
     "simple-git-hooks": "^2.13.0",
diff --git a/src/lib/approval.ts b/src/lib/approval.ts
index 1a7fb6a71..35e4e2752 100644
--- a/src/lib/approval.ts
+++ b/src/lib/approval.ts
@@ -1,6 +1,6 @@
 import consola from "consola"
 
-import { HTTPError } from "./http-error"
+import { HTTPError } from "./error"
 
 export const awaitApproval = async () => {
   const response = await consola.prompt(`Accept incoming request?`, {
diff --git a/src/lib/forward-error.ts b/src/lib/error.ts
similarity index 79%
rename from src/lib/forward-error.ts
rename to src/lib/error.ts
index c0a1e02c2..dd2557df1 100644
--- a/src/lib/forward-error.ts
+++ b/src/lib/error.ts
@@ -3,7 +3,14 @@ import type { ContentfulStatusCode } from "hono/utils/http-status"
 
 import consola from "consola"
 
-import { HTTPError } from "./http-error"
+export class HTTPError extends Error {
+  response: Response
+
+  constructor(message: string, response: Response) {
+    super(message)
+    this.response = response
+  }
+}
 
 export async function forwardError(c: Context, error: unknown) {
   consola.error("Error occurred:", error)
diff --git a/src/lib/http-error.ts b/src/lib/http-error.ts
deleted file mode 100644
index 352d3c628..000000000
--- a/src/lib/http-error.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-export class HTTPError extends Error {
-  response: Response
-
-  constructor(message: string, response: Response) {
-    super(message)
-    this.response = response
-  }
-}
diff --git a/src/lib/is-nullish.ts b/src/lib/is-nullish.ts
deleted file mode 100644
index a31cf3546..000000000
--- a/src/lib/is-nullish.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export const isNullish = (value: unknown): value is null | undefined =>
-  value === null || value === undefined
diff --git a/src/lib/models.ts b/src/lib/models.ts
deleted file mode 100644
index d6a3516b7..000000000
--- a/src/lib/models.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-import consola from "consola"
-
-import { getModels } from "~/services/copilot/get-models"
-
-import { state } from "./state"
-
-export async function cacheModels(): Promise<void> {
-  const models = await getModels()
-  state.models = models
-
-  consola.info(
-    `Available models: \n${models.data.map((model) => `- ${model.id}`).join("\n")}`,
-  )
-}
diff --git a/src/lib/rate-limit.ts b/src/lib/rate-limit.ts
index 6e85a49b7..e41f58297 100644
--- a/src/lib/rate-limit.ts
+++ b/src/lib/rate-limit.ts
@@ -2,8 +2,8 @@ import consola from "consola"
 
 import type { State } from "./state"
 
-import { HTTPError } from "./http-error"
-import { sleep } from "./sleep"
+import { HTTPError } from "./error"
+import { sleep } from "./utils"
 
 export async function checkRateLimit(state: State) {
   if (state.rateLimitSeconds === undefined) return
diff --git a/src/lib/shell.ts b/src/lib/shell.ts
new file mode 100644
index 000000000..6a0052004
--- /dev/null
+++ b/src/lib/shell.ts
@@ -0,0 +1,88 @@
+import { execSync } from "node:child_process"
+import process from "node:process"
+
+type ShellName = "bash" | "zsh" | "fish" | "powershell" | "cmd" | "sh"
+type EnvVars = Record<string, string | undefined>
+
+function getShell(): ShellName {
+  const { platform, ppid, env } = process
+
+  if (platform === "win32") {
+    try {
+      const command = `wmic process get ParentProcessId,Name | findstr "${ppid}"`
+      const parentProcess = execSync(command, { stdio: "pipe" }).toString()
+
+      if (parentProcess.toLowerCase().includes("powershell.exe")) {
+        return "powershell"
+      }
+    } catch {
+      return "cmd"
+    }
+
+    return "cmd"
+  } else {
+    const shellPath = env.SHELL
+    if (shellPath) {
+      if (shellPath.endsWith("zsh")) return "zsh"
+      if (shellPath.endsWith("fish")) return "fish"
+      if (shellPath.endsWith("bash")) return "bash"
+    }
+
+    return "sh"
+  }
+}
+
+/**
+ * Generates a copy-pasteable script to set multiple environment variables
+ * and run a subsequent command.
+ * @param {EnvVars} envVars - An object of environment variables to set.
+ * @param {string} commandToRun - The command to run after setting the variables.
+ * @returns {string} The formatted script string.
+ */
+export function generateEnvScript(
+  envVars: EnvVars,
+  commandToRun: string = "",
+): string {
+  const shell = getShell()
+  const filteredEnvVars = Object.entries(envVars).filter(
+    ([, value]) => value !== undefined,
+  ) as Array<[string, string]>
+
+  let commandBlock: string
+
+  switch (shell) {
+    case "powershell": {
+      commandBlock = filteredEnvVars
+        .map(([key, value]) => `$env:${key} = ${value}`)
+        .join("; ")
+      break
+    }
+    case "cmd": {
+      commandBlock = filteredEnvVars
+        .map(([key, value]) => `set ${key}=${value}`)
+        .join(" & ")
+      break
+    }
+    case "fish": {
+      commandBlock = filteredEnvVars
+        .map(([key, value]) => `set -gx ${key} ${value}`)
+        .join("; ")
+      break
+    }
+    default: {
+      // bash, zsh, sh
+      const assignments = filteredEnvVars
+        .map(([key, value]) => `${key}=${value}`)
+        .join(" ")
+      commandBlock = filteredEnvVars.length > 0 ? `export ${assignments}` : ""
+      break
+    }
+  }
+
+  if (commandBlock && commandToRun) {
+    const separator = shell === "cmd" ? " & " : " && "
+    return `${commandBlock}${separator}${commandToRun}`
+  }
+
+  return commandBlock || commandToRun
+}
diff --git a/src/lib/sleep.ts b/src/lib/sleep.ts
deleted file mode 100644
index 35b2fd531..000000000
--- a/src/lib/sleep.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-export const sleep = (ms: number) =>
-  new Promise((resolve) => {
-    setTimeout(resolve, ms)
-  })
diff --git a/src/lib/token.ts b/src/lib/token.ts
index aa669676d..f2cec3e0d 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -7,7 +7,7 @@ import { getDeviceCode } from "~/services/github/get-device-code"
 import { getGitHubUser } from "~/services/github/get-user"
 import { pollAccessToken } from "~/services/github/poll-access-token"
 
-import { HTTPError } from "./http-error"
+import { HTTPError } from "./error"
 import { state } from "./state"
 
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8")
diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
index 98797c6b6..73cd499f9 100644
--- a/src/lib/tokenizer.ts
+++ b/src/lib/tokenizer.ts
@@ -3,13 +3,35 @@ import { countTokens } from "gpt-tokenizer/model/gpt-4o"
 import type { Message } from "~/services/copilot/create-chat-completions"
 
 export const getTokenCount = (messages: Array<Message>) => {
-  const input = messages.filter(
-    (m) => m.role !== "assistant" && typeof m.content === "string",
-  )
-  const output = messages.filter((m) => m.role === "assistant")
+  const simplifiedMessages = messages.map((message) => {
+    let content = ""
+    if (typeof message.content === "string") {
+      content = message.content
+    } else if (Array.isArray(message.content)) {
+      content = message.content
+        .filter((part) => part.type === "text")
+        .map((part) => (part as { text: string }).text)
+        .join("")
+    }
+    return { ...message, content }
+  })
 
-  const inputTokens = countTokens(input)
-  const outputTokens = countTokens(output)
+  let inputMessages = simplifiedMessages.filter((message) => {
+    return message.role !== "tool"
+  })
+  let outputMessages: typeof simplifiedMessages = []
+
+  const lastMessage = simplifiedMessages.at(-1)
+
+  if (lastMessage?.role === "assistant") {
+    inputMessages = simplifiedMessages.slice(0, -1)
+    outputMessages = [lastMessage]
+  }
+
+  // @ts-expect-error TS can't infer from arr.filter()
+  const inputTokens = countTokens(inputMessages)
+  // @ts-expect-error TS can't infer from arr.filter()
+  const outputTokens = countTokens(outputMessages)
 
   return {
     input: inputTokens,
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
new file mode 100644
index 000000000..cc80be667
--- /dev/null
+++ b/src/lib/utils.ts
@@ -0,0 +1,26 @@
+import consola from "consola"
+
+import { getModels } from "~/services/copilot/get-models"
+import { getVSCodeVersion } from "~/services/get-vscode-version"
+
+import { state } from "./state"
+
+export const sleep = (ms: number) =>
+  new Promise((resolve) => {
+    setTimeout(resolve, ms)
+  })
+
+export const isNullish = (value: unknown): value is null | undefined =>
+  value === null || value === undefined
+
+export async function cacheModels(): Promise<void> {
+  const models = await getModels()
+  state.models = models
+}
+
+export const cacheVSCodeVersion = async () => {
+  const response = await getVSCodeVersion()
+  state.vsCodeVersion = response
+
+  consola.info(`Using VSCode version: ${response}`)
+}
diff --git a/src/lib/vscode-version.ts b/src/lib/vscode-version.ts
deleted file mode 100644
index 5b3301133..000000000
--- a/src/lib/vscode-version.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-import consola from "consola"
-
-import { getVSCodeVersion } from "~/services/get-vscode-version"
-
-import { state } from "./state"
-
-export const cacheVSCodeVersion = async () => {
-  const response = await getVSCodeVersion()
-  state.vsCodeVersion = response
-
-  consola.info(`Using VSCode version: ${response}`)
-}
diff --git a/src/main.ts b/src/main.ts
index 8b6dc3401..1871bd756 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,15 +1,17 @@
 #!/usr/bin/env node
 
 import { defineCommand, runMain } from "citty"
+import clipboard from "clipboardy"
 import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
+import invariant from "tiny-invariant"
 
 import { auth } from "./auth"
-import { cacheModels } from "./lib/models"
 import { ensurePaths } from "./lib/paths"
+import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
-import { cacheVSCodeVersion } from "./lib/vscode-version"
+import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
 import { server } from "./server"
 
 interface RunServerOptions {
@@ -20,8 +22,10 @@ interface RunServerOptions {
   rateLimit?: number
   rateLimitWait: boolean
   githubToken?: string
+  launchClaudeCode: boolean
 }
 
+// eslint-disable-next-line max-lines-per-function
 export async function runServer(options: RunServerOptions): Promise<void> {
   if (options.verbose) {
     consola.level = 5
@@ -50,8 +54,44 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   await setupCopilotToken()
   await cacheModels()
 
+  consola.info(
+    `Available models: \n${state.models?.data.map((model) => `- ${model.id}`).join("\n")}`,
+  )
+
   const serverUrl = `http://localhost:${options.port}`
-  consola.box(`Server started at ${serverUrl}`)
+
+  if (options.launchClaudeCode) {
+    invariant(state.models, "Models should be loaded by now")
+
+    const selectedModel = await consola.prompt(
+      "Select a model to use with Claude Code",
+      {
+        type: "select",
+        options: state.models.data.map((model) => model.id),
+      },
+    )
+
+    const selectedSmallModel = await consola.prompt(
+      "Select a small model to use with Claude Code (https://docs.anthropic.com/en/docs/claude-code/costs#background-token-usage)",
+      {
+        type: "select",
+        options: state.models.data.map((model) => model.id),
+      },
+    )
+
+    const command = generateEnvScript(
+      {
+        ANTHROPIC_BASE_URL: serverUrl,
+        ANTHROPIC_AUTH_TOKEN: "dummy",
+        ANTHROPIC_MODEL: selectedModel,
+        ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
+      },
+      "claude",
+    )
+
+    clipboard.writeSync(command)
+    consola.success("Copied Claude Code command to clipboard!")
+  }
 
   serve({
     fetch: server.fetch as ServerHandler,
@@ -106,6 +146,13 @@ const start = defineCommand({
       description:
         "Provide GitHub token directly (must be generated using the `auth` subcommand)",
     },
+    "claude-code": {
+      alias: "c",
+      type: "boolean",
+      default: false,
+      description:
+        "Generate a command to launch Claude Code with Copilot API config",
+    },
   },
   run({ args }) {
     const rateLimitRaw = args["rate-limit"]
@@ -113,16 +160,15 @@ const start = defineCommand({
       // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
       rateLimitRaw === undefined ? undefined : Number.parseInt(rateLimitRaw, 10)
 
-    const port = Number.parseInt(args.port, 10)
-
     return runServer({
-      port,
+      port: Number.parseInt(args.port, 10),
       verbose: args.verbose,
       accountType: args["account-type"],
       manual: args.manual,
       rateLimit,
       rateLimitWait: Boolean(args.wait),
       githubToken: args["github-token"],
+      launchClaudeCode: args["claude-code"],
     })
   },
 })
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 29a8719c3..69beaa105 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -4,10 +4,10 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
-import { isNullish } from "~/lib/is-nullish"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
+import { isNullish } from "~/lib/utils"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
diff --git a/src/routes/chat-completions/route.ts b/src/routes/chat-completions/route.ts
index c55a3a7b2..996de4a06 100644
--- a/src/routes/chat-completions/route.ts
+++ b/src/routes/chat-completions/route.ts
@@ -1,6 +1,6 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
+import { forwardError } from "~/lib/error"
 
 import { handleCompletion } from "./handler"
 
diff --git a/src/routes/embeddings/route.ts b/src/routes/embeddings/route.ts
index f18c86457..4c4fc7b8a 100644
--- a/src/routes/embeddings/route.ts
+++ b/src/routes/embeddings/route.ts
@@ -1,6 +1,6 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
+import { forwardError } from "~/lib/error"
 import {
   createEmbeddings,
   type EmbeddingRequest,
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
new file mode 100644
index 000000000..31c6e86e2
--- /dev/null
+++ b/src/routes/messages/anthropic-types.ts
@@ -0,0 +1,201 @@
+// Anthropic API Types
+
+export interface AnthropicMessagesPayload {
+  model: string
+  messages: Array<AnthropicMessage>
+  max_tokens: number
+  system?: string | Array<AnthropicTextBlock>
+  metadata?: {
+    user_id?: string
+  }
+  stop_sequences?: Array<string>
+  stream?: boolean
+  temperature?: number
+  top_p?: number
+  top_k?: number
+  tools?: Array<AnthropicTool>
+  tool_choice?: {
+    type: "auto" | "any" | "tool" | "none"
+    name?: string
+  }
+  thinking?: {
+    type: "enabled"
+    budget_tokens?: number
+  }
+  service_tier?: "auto" | "standard_only"
+}
+
+export interface AnthropicTextBlock {
+  type: "text"
+  text: string
+}
+
+export interface AnthropicImageBlock {
+  type: "image"
+  source: {
+    type: "base64"
+    media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"
+    data: string
+  }
+}
+
+export interface AnthropicToolResultBlock {
+  type: "tool_result"
+  tool_use_id: string
+  content: string
+  is_error?: boolean
+}
+
+export interface AnthropicToolUseBlock {
+  type: "tool_use"
+  id: string
+  name: string
+  input: Record<string, unknown>
+}
+
+export interface AnthropicThinkingBlock {
+  type: "thinking"
+  thinking: string
+}
+
+export type AnthropicUserContentBlock =
+  | AnthropicTextBlock
+  | AnthropicImageBlock
+  | AnthropicToolResultBlock
+
+export type AnthropicAssistantContentBlock =
+  | AnthropicTextBlock
+  | AnthropicToolUseBlock
+  | AnthropicThinkingBlock
+
+export interface AnthropicUserMessage {
+  role: "user"
+  content: string | Array<AnthropicUserContentBlock>
+}
+
+export interface AnthropicAssistantMessage {
+  role: "assistant"
+  content: string | Array<AnthropicAssistantContentBlock>
+}
+
+export type AnthropicMessage = AnthropicUserMessage | AnthropicAssistantMessage
+
+export interface AnthropicTool {
+  name: string
+  description?: string
+  input_schema: Record<string, unknown>
+}
+
+export interface AnthropicResponse {
+  id: string
+  type: "message"
+  role: "assistant"
+  content: Array<AnthropicAssistantContentBlock>
+  model: string
+  stop_reason:
+    | "end_turn"
+    | "max_tokens"
+    | "stop_sequence"
+    | "tool_use"
+    | "pause_turn"
+    | "refusal"
+    | null
+  stop_sequence: string | null
+  usage: {
+    input_tokens: number
+    output_tokens: number
+    cache_creation_input_tokens?: number
+    cache_read_input_tokens?: number
+    service_tier?: "standard" | "priority" | "batch"
+  }
+}
+
+export type AnthropicResponseContentBlock = AnthropicAssistantContentBlock
+
+// Anthropic Stream Event Types
+export interface AnthropicMessageStartEvent {
+  type: "message_start"
+  message: Omit<
+    AnthropicResponse,
+    "content" | "stop_reason" | "stop_sequence"
+  > & {
+    content: []
+    stop_reason: null
+    stop_sequence: null
+  }
+}
+
+export interface AnthropicContentBlockStartEvent {
+  type: "content_block_start"
+  index: number
+  content_block:
+    | { type: "text"; text: string }
+    | (Omit<AnthropicToolUseBlock, "input"> & {
+        input: Record<string, unknown>
+      })
+    | { type: "thinking"; thinking: string }
+}
+
+export interface AnthropicContentBlockDeltaEvent {
+  type: "content_block_delta"
+  index: number
+  delta:
+    | { type: "text_delta"; text: string }
+    | { type: "input_json_delta"; partial_json: string }
+    | { type: "thinking_delta"; thinking: string }
+    | { type: "signature_delta"; signature: string }
+}
+
+export interface AnthropicContentBlockStopEvent {
+  type: "content_block_stop"
+  index: number
+}
+
+export interface AnthropicMessageDeltaEvent {
+  type: "message_delta"
+  delta: {
+    stop_reason?: AnthropicResponse["stop_reason"]
+    stop_sequence?: string | null
+  }
+  usage?: { output_tokens: number }
+}
+
+export interface AnthropicMessageStopEvent {
+  type: "message_stop"
+}
+
+export interface AnthropicPingEvent {
+  type: "ping"
+}
+
+export interface AnthropicErrorEvent {
+  type: "error"
+  error: {
+    type: string
+    message: string
+  }
+}
+
+export type AnthropicStreamEventData =
+  | AnthropicMessageStartEvent
+  | AnthropicContentBlockStartEvent
+  | AnthropicContentBlockDeltaEvent
+  | AnthropicContentBlockStopEvent
+  | AnthropicMessageDeltaEvent
+  | AnthropicMessageStopEvent
+  | AnthropicPingEvent
+  | AnthropicErrorEvent
+
+// State for streaming translation
+export interface AnthropicStreamState {
+  messageStartSent: boolean
+  contentBlockIndex: number
+  contentBlockOpen: boolean
+  toolCalls: {
+    [openAIToolIndex: number]: {
+      id: string
+      name: string
+      anthropicBlockIndex: number
+    }
+  }
+}
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
new file mode 100644
index 000000000..fd8c03863
--- /dev/null
+++ b/src/routes/messages/handler.ts
@@ -0,0 +1,92 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createChatCompletions,
+  type ChatCompletionChunk,
+  type ChatCompletionResponse,
+} from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicMessagesPayload,
+  type AnthropicStreamState,
+} from "./anthropic-types"
+import {
+  translateToAnthropic,
+  translateToOpenAI,
+} from "./non-stream-translation"
+import { translateChunkToAnthropicEvents } from "./stream-translation"
+
+// eslint-disable-next-line max-lines-per-function
+export async function handleCompletion(c: Context) {
+  await checkRateLimit(state)
+
+  const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
+  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
+
+  const openAIPayload = translateToOpenAI(anthropicPayload)
+  consola.debug(
+    "Translated OpenAI request payload:",
+    JSON.stringify(openAIPayload),
+  )
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createChatCompletions(openAIPayload)
+
+  if (isNonStreaming(response)) {
+    consola.debug(
+      "Non-streaming response from Copilot:",
+      JSON.stringify(response),
+    )
+    const anthropicResponse = translateToAnthropic(response)
+    consola.debug(
+      "Translated Anthropic response:",
+      JSON.stringify(anthropicResponse),
+    )
+    return c.json(anthropicResponse)
+  }
+
+  consola.debug("Streaming response from Copilot")
+  return streamSSE(c, async (stream) => {
+    const streamState: AnthropicStreamState = {
+      messageStartSent: false,
+      contentBlockIndex: 0,
+      contentBlockOpen: false,
+      toolCalls: {},
+    }
+
+    for await (const rawEvent of response) {
+      consola.trace("Copilot raw stream event:", JSON.stringify(rawEvent))
+      if (rawEvent.data === "[DONE]") {
+        break
+      }
+
+      if (!rawEvent.data) {
+        continue
+      }
+
+      const chunk = JSON.parse(rawEvent.data) as ChatCompletionChunk
+      const events = translateChunkToAnthropicEvents(chunk, streamState)
+
+      for (const event of events) {
+        consola.trace("Translated Anthropic event:", JSON.stringify(event))
+        await stream.writeSSE({
+          event: event.type,
+          data: JSON.stringify(event),
+        })
+      }
+    }
+  })
+}
+
+const isNonStreaming = (
+  response: Awaited<ReturnType<typeof createChatCompletions>>,
+): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
new file mode 100644
index 000000000..f7365f461
--- /dev/null
+++ b/src/routes/messages/non-stream-translation.ts
@@ -0,0 +1,285 @@
+import {
+  type ChatCompletionResponse,
+  type ChatCompletionsPayload,
+  type ContentPart,
+  type Message,
+  type TextPart,
+  type Tool,
+  type ToolCall,
+} from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicAssistantContentBlock,
+  type AnthropicAssistantMessage,
+  type AnthropicMessage,
+  type AnthropicMessagesPayload,
+  type AnthropicResponse,
+  type AnthropicTextBlock,
+  type AnthropicTool,
+  type AnthropicToolResultBlock,
+  type AnthropicToolUseBlock,
+  type AnthropicUserContentBlock,
+  type AnthropicUserMessage,
+} from "./anthropic-types"
+import { mapOpenAIStopReasonToAnthropic } from "./utils"
+
+// Payload translation
+
+export function translateToOpenAI(
+  payload: AnthropicMessagesPayload,
+): ChatCompletionsPayload {
+  return {
+    model: payload.model,
+    messages: translateAnthropicMessagesToOpenAI(
+      payload.messages,
+      payload.system,
+    ),
+    max_tokens: payload.max_tokens,
+    stop: payload.stop_sequences,
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    user: payload.metadata?.user_id,
+    tools: translateAnthropicToolsToOpenAI(payload.tools),
+    tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+  }
+}
+
+function translateAnthropicMessagesToOpenAI(
+  anthropicMessages: Array<AnthropicMessage>,
+  system: string | Array<AnthropicTextBlock> | undefined,
+): Array<Message> {
+  const systemMessages = handleSystemPrompt(system)
+
+  const otherMessages = anthropicMessages.flatMap((message) =>
+    message.role === "user" ?
+      handleUserMessage(message)
+    : handleAssistantMessage(message),
+  )
+
+  return [...systemMessages, ...otherMessages]
+}
+
+function handleSystemPrompt(
+  system: string | Array<AnthropicTextBlock> | undefined,
+): Array<Message> {
+  if (!system) {
+    return []
+  }
+
+  if (typeof system === "string") {
+    return [{ role: "system", content: system }]
+  } else {
+    const systemText = system.map((block) => block.text).join("\n\n")
+    return [{ role: "system", content: systemText }]
+  }
+}
+
+function handleUserMessage(message: AnthropicUserMessage): Array<Message> {
+  const newMessages: Array<Message> = []
+
+  if (Array.isArray(message.content)) {
+    const toolResultBlocks = message.content.filter(
+      (block): block is AnthropicToolResultBlock =>
+        block.type === "tool_result",
+    )
+    const otherBlocks = message.content.filter(
+      (block) => block.type !== "tool_result",
+    )
+
+    if (otherBlocks.length > 0) {
+      newMessages.push({
+        role: "user",
+        content: mapContent(otherBlocks),
+      })
+    }
+
+    for (const block of toolResultBlocks) {
+      newMessages.push({
+        role: "tool",
+        tool_call_id: block.tool_use_id,
+        content: block.content,
+      })
+    }
+  } else {
+    newMessages.push({
+      role: "user",
+      content: mapContent(message.content),
+    })
+  }
+
+  return newMessages
+}
+
+function handleAssistantMessage(
+  message: AnthropicAssistantMessage,
+): Array<Message> {
+  if (!Array.isArray(message.content)) {
+    return [
+      {
+        role: "assistant",
+        content: mapContent(message.content),
+      },
+    ]
+  }
+
+  const toolUseBlocks = message.content.filter(
+    (block): block is AnthropicToolUseBlock => block.type === "tool_use",
+  )
+
+  const textBlocks = message.content.filter(
+    (block): block is AnthropicTextBlock => block.type === "text",
+  )
+
+  return toolUseBlocks.length > 0 ?
+      [
+        {
+          role: "assistant",
+          content: textBlocks.map((b) => b.text).join("\n\n") || null,
+          tool_calls: toolUseBlocks.map((toolUse) => ({
+            id: toolUse.id,
+            type: "function",
+            function: {
+              name: toolUse.name,
+              arguments: JSON.stringify(toolUse.input),
+            },
+          })),
+        },
+      ]
+    : [
+        {
+          role: "assistant",
+          content: mapContent(message.content),
+        },
+      ]
+}
+
+function mapContent(
+  content:
+    | string
+    | Array<AnthropicUserContentBlock | AnthropicAssistantContentBlock>,
+): string | Array<ContentPart> | null {
+  if (typeof content === "string") {
+    return content
+  }
+  if (!Array.isArray(content)) {
+    return null
+  }
+
+  const contentParts: Array<ContentPart> = []
+  for (const block of content) {
+    if (block.type === "text") {
+      contentParts.push({ type: "text", text: block.text })
+    } else if (block.type === "image") {
+      contentParts.push({
+        type: "image_url",
+        image_url: {
+          url: `data:${block.source.media_type};base64,${block.source.data}`,
+        },
+      })
+    }
+  }
+  return contentParts
+}
+
+function translateAnthropicToolsToOpenAI(
+  anthropicTools: Array<AnthropicTool> | undefined,
+): Array<Tool> | undefined {
+  if (!anthropicTools) {
+    return undefined
+  }
+  return anthropicTools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.input_schema,
+    },
+  }))
+}
+
+function translateAnthropicToolChoiceToOpenAI(
+  anthropicToolChoice: AnthropicMessagesPayload["tool_choice"],
+): ChatCompletionsPayload["tool_choice"] {
+  if (!anthropicToolChoice) {
+    return undefined
+  }
+
+  switch (anthropicToolChoice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      if (anthropicToolChoice.name) {
+        return {
+          type: "function",
+          function: { name: anthropicToolChoice.name },
+        }
+      }
+      return undefined
+    }
+    case "none": {
+      return "none"
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+// Response translation
+
+export function translateToAnthropic(
+  response: ChatCompletionResponse,
+): AnthropicResponse {
+  const choice = response.choices[0]
+  const textBlocks = getAnthropicTextBlocks(choice.message.content)
+  const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    model: response.model,
+    content: [...textBlocks, ...toolUseBlocks],
+    stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+    stop_sequence: null,
+    usage: {
+      input_tokens: 1,
+      output_tokens: 1,
+    },
+  }
+}
+
+function getAnthropicTextBlocks(
+  messageContent: Message["content"],
+): Array<AnthropicTextBlock> {
+  if (typeof messageContent === "string") {
+    return [{ type: "text", text: messageContent }]
+  }
+
+  if (Array.isArray(messageContent)) {
+    return messageContent
+      .filter((part): part is TextPart => part.type === "text")
+      .map((part) => ({ type: "text", text: part.text }))
+  }
+
+  return []
+}
+
+function getAnthropicToolUseBlocks(
+  toolCalls: Array<ToolCall> | undefined,
+): Array<AnthropicToolUseBlock> {
+  if (!toolCalls) {
+    return []
+  }
+  return toolCalls.map((toolCall) => ({
+    type: "tool_use",
+    id: toolCall.id,
+    name: toolCall.function.name,
+    input: JSON.parse(toolCall.function.arguments) as Record<string, unknown>,
+  }))
+}
diff --git a/src/routes/messages/route.ts b/src/routes/messages/route.ts
new file mode 100644
index 000000000..1f4eee2f9
--- /dev/null
+++ b/src/routes/messages/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleCompletion } from "./handler"
+
+export const messageRoutes = new Hono()
+
+messageRoutes.post("/", async (c) => {
+  try {
+    return await handleCompletion(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
new file mode 100644
index 000000000..536893617
--- /dev/null
+++ b/src/routes/messages/stream-translation.ts
@@ -0,0 +1,176 @@
+import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions"
+
+import {
+  type AnthropicStreamEventData,
+  type AnthropicStreamState,
+} from "./anthropic-types"
+import { mapOpenAIStopReasonToAnthropic } from "./utils"
+
+function isToolBlockOpen(state: AnthropicStreamState): boolean {
+  if (!state.contentBlockOpen) {
+    return false
+  }
+  // Check if the current block index corresponds to any known tool call
+  return Object.values(state.toolCalls).some(
+    (tc) => tc.anthropicBlockIndex === state.contentBlockIndex,
+  )
+}
+
+// eslint-disable-next-line max-lines-per-function, complexity
+export function translateChunkToAnthropicEvents(
+  chunk: ChatCompletionChunk,
+  state: AnthropicStreamState,
+): Array<AnthropicStreamEventData> {
+  const events: Array<AnthropicStreamEventData> = []
+
+  // @ts-expect-error sometimes chunk.choices is empty, and idk why
+  // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+  if (chunk.choices.length === 0) {
+    return events
+  }
+
+  const choice = chunk.choices[0]
+  const { delta } = choice
+
+  if (!state.messageStartSent) {
+    events.push({
+      type: "message_start",
+      message: {
+        id: chunk.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: chunk.model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: 1,
+          output_tokens: 1, // Anthropic requires this to be > 0
+        },
+      },
+    })
+    state.messageStartSent = true
+  }
+
+  if (delta.content) {
+    if (isToolBlockOpen(state)) {
+      // A tool block was open, so close it before starting a text block.
+      events.push({
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      })
+      state.contentBlockIndex++
+      state.contentBlockOpen = false
+    }
+
+    if (!state.contentBlockOpen) {
+      events.push({
+        type: "content_block_start",
+        index: state.contentBlockIndex,
+        content_block: {
+          type: "text",
+          text: "",
+        },
+      })
+      state.contentBlockOpen = true
+    }
+
+    events.push({
+      type: "content_block_delta",
+      index: state.contentBlockIndex,
+      delta: {
+        type: "text_delta",
+        text: delta.content,
+      },
+    })
+  }
+
+  if (delta.tool_calls) {
+    for (const toolCall of delta.tool_calls) {
+      if (toolCall.id && toolCall.function?.name) {
+        // New tool call starting.
+        if (state.contentBlockOpen) {
+          // Close any previously open block.
+          events.push({
+            type: "content_block_stop",
+            index: state.contentBlockIndex,
+          })
+          state.contentBlockIndex++
+          state.contentBlockOpen = false
+        }
+
+        const anthropicBlockIndex = state.contentBlockIndex
+        state.toolCalls[toolCall.index] = {
+          id: toolCall.id,
+          name: toolCall.function.name,
+          anthropicBlockIndex,
+        }
+
+        events.push({
+          type: "content_block_start",
+          index: anthropicBlockIndex,
+          content_block: {
+            type: "tool_use",
+            id: toolCall.id,
+            name: toolCall.function.name,
+            input: {},
+          },
+        })
+        state.contentBlockOpen = true
+      }
+
+      if (toolCall.function?.arguments) {
+        const toolCallInfo = state.toolCalls[toolCall.index]
+        // Tool call can still be empty
+        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+        if (toolCallInfo) {
+          events.push({
+            type: "content_block_delta",
+            index: toolCallInfo.anthropicBlockIndex,
+            delta: {
+              type: "input_json_delta",
+              partial_json: toolCall.function.arguments,
+            },
+          })
+        }
+      }
+    }
+  }
+
+  if (choice.finish_reason) {
+    if (state.contentBlockOpen) {
+      events.push({
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      })
+      state.contentBlockOpen = false
+    }
+
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+        stop_sequence: null,
+      },
+      usage: {
+        output_tokens: 1,
+      },
+    })
+
+    events.push({
+      type: "message_stop",
+    })
+  }
+
+  return events
+}
+
+export function translateErrorToAnthropicErrorEvent(): AnthropicStreamEventData {
+  return {
+    type: "error",
+    error: {
+      type: "api_error",
+      message: "An unexpected error occurred during streaming.",
+    },
+  }
+}
diff --git a/src/routes/messages/utils.ts b/src/routes/messages/utils.ts
new file mode 100644
index 000000000..d0febfc9d
--- /dev/null
+++ b/src/routes/messages/utils.ts
@@ -0,0 +1,16 @@
+import { type AnthropicResponse } from "./anthropic-types"
+
+export function mapOpenAIStopReasonToAnthropic(
+  finishReason: "stop" | "length" | "tool_calls" | "content_filter" | null,
+): AnthropicResponse["stop_reason"] {
+  if (finishReason === null) {
+    return null
+  }
+  const stopReasonMap = {
+    stop: "end_turn",
+    length: "max_tokens",
+    tool_calls: "tool_use",
+    content_filter: "end_turn",
+  } as const
+  return stopReasonMap[finishReason]
+}
diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts
index 8e282a391..5254e2af7 100644
--- a/src/routes/models/route.ts
+++ b/src/routes/models/route.ts
@@ -1,14 +1,33 @@
 import { Hono } from "hono"
 
-import { forwardError } from "~/lib/forward-error"
-import { getModels } from "~/services/copilot/get-models"
+import { forwardError } from "~/lib/error"
+import { state } from "~/lib/state"
+import { cacheModels } from "~/lib/utils"
 
 export const modelRoutes = new Hono()
 
 modelRoutes.get("/", async (c) => {
   try {
-    const models = await getModels()
-    return c.json(models)
+    if (!state.models) {
+      // This should be handled by startup logic, but as a fallback.
+      await cacheModels()
+    }
+
+    const models = state.models?.data.map((model) => ({
+      id: model.id,
+      object: "model",
+      type: "model",
+      created: 0, // No date available from source
+      created_at: new Date(0).toISOString(), // No date available from source
+      owned_by: model.vendor,
+      display_name: model.name,
+    }))
+
+    return c.json({
+      object: "list",
+      data: models,
+      has_more: false,
+    })
   } catch (error) {
     return await forwardError(c, error)
   }
diff --git a/src/server.ts b/src/server.ts
index eb65371bf..f72d61b96 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -4,6 +4,7 @@ import { logger } from "hono/logger"
 
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
+import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
 
 export const server = new Hono()
@@ -21,3 +22,7 @@ server.route("/embeddings", embeddingRoutes)
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+
+// Anthropic compatible endpoints
+server.route("/v1/messages", messageRoutes)
+server.post("/v1/messages/count_tokens", (c) => c.json({ input_tokens: 1 }))
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 7d54d11f0..da9d0c19d 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -1,7 +1,7 @@
 import { events } from "fetch-event-stream"
 
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
@@ -9,19 +9,15 @@ export const createChatCompletions = async (
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
-  for (const message of payload.messages) {
-    intoCopilotMessage(message)
-  }
-
-  const visionEnable = payload.messages.some(
+  const enableVision = payload.messages.some(
     (x) =>
       typeof x.content !== "string"
-      && x.content.some((x) => x.type === "image_url"),
+      && x.content?.some((x) => x.type === "image_url"),
   )
 
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
-    headers: copilotHeaders(state, visionEnable),
+    headers: copilotHeaders(state, enableVision),
     body: JSON.stringify(payload),
   })
 
@@ -35,51 +31,59 @@ export const createChatCompletions = async (
   return (await response.json()) as ChatCompletionResponse
 }
 
-const intoCopilotMessage = (message: Message) => {
-  if (typeof message.content === "string") return false
-
-  for (const part of message.content) {
-    if (part.type === "input_image") part.type = "image_url"
-  }
-}
-
 // Streaming types
 
 export interface ChatCompletionChunk {
-  choices: [Choice]
-  created: number
-  object: "chat.completion.chunk"
   id: string
+  object: "chat.completion.chunk"
+  created: number
   model: string
+  choices: [Choice]
+  system_fingerprint?: string
 }
 
 interface Delta {
-  content?: string
-  role?: string
+  content?: string | null
+  role?: "user" | "assistant" | "system" | "tool"
+  tool_calls?: Array<{
+    index: number
+    id?: string
+    type?: "function"
+    function?: {
+      name?: string
+      arguments?: string
+    }
+  }>
 }
 
 interface Choice {
   index: number
   delta: Delta
-  finish_reason: "stop" | null
-  logprobs: null
+  finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null
+  logprobs: object | null
 }
 
 // Non-streaming types
 
 export interface ChatCompletionResponse {
   id: string
-  object: string
+  object: "chat.completion"
   created: number
   model: string
   choices: [ChoiceNonStreaming]
+  system_fingerprint?: string
+  usage?: {
+    prompt_tokens: number
+    completion_tokens: number
+    total_tokens: number
+  }
 }
 
 interface ChoiceNonStreaming {
   index: number
   message: Message
-  logprobs: null
-  finish_reason: "stop"
+  logprobs: object | null
+  finish_reason: "stop" | "length" | "tool_calls" | "content_filter"
 }
 
 // Payload types
@@ -87,25 +91,67 @@ interface ChoiceNonStreaming {
 export interface ChatCompletionsPayload {
   messages: Array<Message>
   model: string
-  temperature?: number
-  top_p?: number
-  max_tokens?: number
-  stop?: Array<string>
-  n?: number
-  stream?: boolean
+  temperature?: number | null
+  top_p?: number | null
+  max_tokens?: number | null
+  stop?: string | Array<string> | null
+  n?: number | null
+  stream?: boolean | null
+
+  frequency_penalty?: number | null
+  presence_penalty?: number | null
+  logit_bias?: Record<string, number> | null
+  logprobs?: boolean | null
+  response_format?: { type: "json_object" } | null
+  seed?: number | null
+  tools?: Array<Tool> | null
+  tool_choice?:
+    | "none"
+    | "auto"
+    | "required"
+    | { type: "function"; function: { name: string } }
+    | null
+  user?: string | null
+}
+
+export interface Tool {
+  type: "function"
+  function: {
+    name: string
+    description?: string
+    parameters: Record<string, unknown>
+  }
 }
 
 export interface Message {
-  role: "user" | "assistant" | "system"
-  content: string | Array<ContentPart>
+  role: "user" | "assistant" | "system" | "tool"
+  content: string | Array<ContentPart> | null
+
+  name?: string
+  tool_calls?: Array<ToolCall>
+  tool_call_id?: string
+}
+
+export interface ToolCall {
+  id: string
+  type: "function"
+  function: {
+    name: string
+    arguments: string
+  }
 }
 
-// https://platform.openai.com/docs/api-reference
+export type ContentPart = TextPart | ImagePart
 
-export interface ContentPart {
-  type: "input_image" | "input_text" | "image_url"
-  text?: string
-  image_url?: string
+export interface TextPart {
+  type: "text"
+  text: string
+}
+
+export interface ImagePart {
+  type: "image_url"
+  image_url: {
+    url: string
+    detail?: "low" | "high" | "auto"
+  }
 }
-// https://platform.openai.com/docs/guides/images-vision#giving-a-model-images-as-input
-// Note: copilot use "image_url", but openai use "input_image"
diff --git a/src/services/copilot/create-embeddings.ts b/src/services/copilot/create-embeddings.ts
index 7b43a19b4..f2ad5c233 100644
--- a/src/services/copilot/create-embeddings.ts
+++ b/src/services/copilot/create-embeddings.ts
@@ -1,5 +1,5 @@
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const createEmbeddings = async (payload: EmbeddingRequest) => {
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 68279a273..792adc480 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -1,5 +1,5 @@
 import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getModels = async () => {
diff --git a/src/services/github/get-copilot-token.ts b/src/services/github/get-copilot-token.ts
index 55701f300..98744bab1 100644
--- a/src/services/github/get-copilot-token.ts
+++ b/src/services/github/get-copilot-token.ts
@@ -1,5 +1,5 @@
 import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getCopilotToken = async () => {
diff --git a/src/services/github/get-device-code.ts b/src/services/github/get-device-code.ts
index 1c3bebbb4..cf35f4ec9 100644
--- a/src/services/github/get-device-code.ts
+++ b/src/services/github/get-device-code.ts
@@ -4,7 +4,7 @@ import {
   GITHUB_CLIENT_ID,
   standardHeaders,
 } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 
 export async function getDeviceCode(): Promise<DeviceCodeResponse> {
   const response = await fetch(`${GITHUB_BASE_URL}/login/device/code`, {
diff --git a/src/services/github/get-user.ts b/src/services/github/get-user.ts
index 215907546..23e1b1c1c 100644
--- a/src/services/github/get-user.ts
+++ b/src/services/github/get-user.ts
@@ -1,5 +1,5 @@
 import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config"
-import { HTTPError } from "~/lib/http-error"
+import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export async function getGitHubUser() {
diff --git a/src/services/github/poll-access-token.ts b/src/services/github/poll-access-token.ts
index 938ff70bd..4639ee0dc 100644
--- a/src/services/github/poll-access-token.ts
+++ b/src/services/github/poll-access-token.ts
@@ -5,7 +5,7 @@ import {
   GITHUB_CLIENT_ID,
   standardHeaders,
 } from "~/lib/api-config"
-import { sleep } from "~/lib/sleep"
+import { sleep } from "~/lib/utils"
 
 import type { DeviceCodeResponse } from "./get-device-code"