From b46331a750e33ee00576663db33ed7962e508cb0 Mon Sep 17 00:00:00 2001 From: BlueSkyXN <63384277+BlueSkyXN@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:12:26 +0800 Subject: [PATCH 1/6] feat: rebrand to Copilot-DirectAPI and add --all-agent flag Rename project from copilot-api to Copilot-DirectAPI across package.json, README, CI, dashboard, and CLI. Centralize app constants in app-info.ts. Add legacy token migration from the old data path. Introduce --all-agent flag to force X-Initiator=agent on every request. Upload build artifacts in CI workflow. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 14 +++- .gitignore | 3 +- README.md | 80 ++++++++++--------- bun.lock | 2 +- package.json | 10 +-- pages/index.html | 4 +- src/debug.ts | 3 +- src/lib/app-info.ts | 4 + src/lib/paths.ts | 33 +++++++- src/lib/state.ts | 2 + src/main.ts | 5 +- .../copilot/create-chat-completions.ts | 2 +- src/start.ts | 19 ++++- start.bat | 4 +- tests/create-chat-completions.test.ts | 20 +++++ 15 files changed, 148 insertions(+), 57 deletions(-) create mode 100644 src/lib/app-info.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34715c31e..4642f2754 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,4 +29,16 @@ jobs: run: bun test - name: Build - run: bun run build \ No newline at end of file + run: bun run build + + - name: Upload build artifact + uses: actions/upload-artifact@v4 + with: + name: copilot-directapi-dist-${{ github.sha }} + path: | + dist + package.json + README.md + LICENSE + if-no-files-found: error + retention-days: 14 diff --git a/.gitignore b/.gitignore index 577a4f199..d8265e427 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ node_modules/ .eslintcache # build output -dist/ \ No newline at end of file +dist/ +.DS_Store diff --git a/README.md b/README.md index 0d36c13c9..3f3ca110d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Copilot API Proxy +# Copilot-DirectAPI > [!WARNING] > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk. @@ -27,13 +27,13 @@ ## Project Overview -A reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API, including to power [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview). +Copilot-DirectAPI is a reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API, including to power [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview). ## Features - **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API. - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`). -- **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics. +- **Usage Dashboard**: A web-based dashboard to monitor your Copilot-DirectAPI usage, view quotas, and see detailed statistics. - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests. - **Manual Request Approval**: Manually approve or deny each API request for fine-grained control over usage (`--manual`). - **Token Visibility**: Option to display GitHub and Copilot tokens during authentication and refresh for debugging (`--show-token`). @@ -62,7 +62,7 @@ bun install Build image ```sh -docker build -t copilot-api . +docker build -t copilot-directapi . ``` Run the container @@ -74,11 +74,11 @@ mkdir -p ./copilot-data # Run the container with a bind mount to persist the token # This ensures your authentication survives container restarts -docker run -p 4141:4141 -v $(pwd)/copilot-data:/root/.local/share/copilot-api copilot-api +docker run -p 4141:4141 -v $(pwd)/copilot-data:/root/.local/share/Copilot-DirectAPI copilot-directapi ``` > **Note:** -> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/root/.local/share/copilot-api` inside the container, ensuring persistence across restarts. +> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/root/.local/share/Copilot-DirectAPI` inside the container, ensuring persistence across restarts. Existing data from the old `copilot-api` path is migrated automatically. ### Docker with Environment Variables @@ -86,13 +86,13 @@ You can pass the GitHub token directly to the container using environment variab ```sh # Build with GitHub token -docker build --build-arg GH_TOKEN=your_github_token_here -t copilot-api . +docker build --build-arg GH_TOKEN=your_github_token_here -t copilot-directapi . # Run with GitHub token -docker run -p 4141:4141 -e GH_TOKEN=your_github_token_here copilot-api +docker run -p 4141:4141 -e GH_TOKEN=your_github_token_here copilot-directapi # Run with additional options -docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api start --verbose --port 4141 +docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-directapi start --verbose --port 4141 ``` ### Docker Compose Example @@ -100,7 +100,7 @@ docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api start --verbose --por ```yaml version: "3.8" services: - copilot-api: + copilot-directapi: build: . ports: - "4141:4141" @@ -120,27 +120,29 @@ The Docker image includes: You can run the project directly using npx: +The published package name is `copilot-directapi`. The CLI binary name is `Copilot-DirectAPI`. + ```sh -npx copilot-api@latest start +npx copilot-directapi@latest start ``` With options: ```sh -npx copilot-api@latest start --port 8080 +npx copilot-directapi@latest start --port 8080 ``` For authentication only: ```sh -npx copilot-api@latest auth +npx copilot-directapi@latest auth ``` ## Command Structure -Copilot API now uses a subcommand structure with these main commands: +Copilot-DirectAPI now uses a subcommand structure with these main commands: -- `start`: Start the Copilot API server. This command will also handle authentication if needed. +- `start`: Start the Copilot-DirectAPI server. This command will also handle authentication if needed. - `auth`: Run GitHub authentication flow without starting the server. This is typically used if you need to generate a token for use with the `--github-token` option, especially in non-interactive environments. - `check-usage`: Show your current GitHub Copilot usage and quota information directly in the terminal (no server required). - `debug`: Display diagnostic information including version, runtime details, file paths, and authentication status. Useful for troubleshooting and support. @@ -156,11 +158,12 @@ The following command line options are available for the `start` command: | --port | Port to listen on | 4141 | -p | | --verbose | Enable verbose logging | false | -v | | --account-type | Account type to use (individual, business, enterprise) | individual | -a | +| --all-agent | Always send `X-Initiator=agent` for compatibility debugging | false | none | | --manual | Enable manual request approval | false | none | | --rate-limit | Rate limit in seconds between requests | none | -r | | --wait | Wait instead of error when rate limit is hit | false | -w | | --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none | -g | -| --claude-code | Generate a command to launch Claude Code with Copilot API config | false | -c | +| --claude-code | Generate a command to launch Claude Code with Copilot-DirectAPI config | false | -c | | --show-token | Show GitHub and Copilot tokens on fetch and refresh | false | none | | --proxy-env | Initialize proxy from environment variables | false | none | @@ -179,7 +182,7 @@ The following command line options are available for the `start` command: ## API Endpoints -The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services. +The server exposes several endpoints to interact with GitHub Copilot through Copilot-DirectAPI. It provides OpenAI-compatible endpoints and also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services. ### OpenAI Compatible Endpoints @@ -215,58 +218,61 @@ Using with npx: ```sh # Basic usage with start command -npx copilot-api@latest start +npx copilot-directapi@latest start # Run on custom port with verbose logging -npx copilot-api@latest start --port 8080 --verbose +npx copilot-directapi@latest start --port 8080 --verbose # Use with a business plan GitHub account -npx copilot-api@latest start --account-type business +npx copilot-directapi@latest start --account-type business # Use with an enterprise plan GitHub account -npx copilot-api@latest start --account-type enterprise +npx copilot-directapi@latest start --account-type enterprise + +# Force X-Initiator=agent on every request for compatibility debugging +npx copilot-directapi@latest start --all-agent # Enable manual approval for each request -npx copilot-api@latest start --manual +npx copilot-directapi@latest start --manual # Set rate limit to 30 seconds between requests -npx copilot-api@latest start --rate-limit 30 +npx copilot-directapi@latest start --rate-limit 30 # Wait instead of error when rate limit is hit -npx copilot-api@latest start --rate-limit 30 --wait +npx copilot-directapi@latest start --rate-limit 30 --wait # Provide GitHub token directly -npx copilot-api@latest start --github-token ghp_YOUR_TOKEN_HERE +npx copilot-directapi@latest start --github-token ghp_YOUR_TOKEN_HERE # Run only the auth flow -npx copilot-api@latest auth +npx copilot-directapi@latest auth # Run auth flow with verbose logging -npx copilot-api@latest auth --verbose +npx copilot-directapi@latest auth --verbose # Show your Copilot usage/quota in the terminal (no server needed) -npx copilot-api@latest check-usage +npx copilot-directapi@latest check-usage # Display debug information for troubleshooting -npx copilot-api@latest debug +npx copilot-directapi@latest debug # Display debug information in JSON format -npx copilot-api@latest debug --json +npx copilot-directapi@latest debug --json # Initialize proxy from environment variables (HTTP_PROXY, HTTPS_PROXY, etc.) -npx copilot-api@latest start --proxy-env +npx copilot-directapi@latest start --proxy-env ``` ## Using the Usage Viewer -After starting the server, a URL to the Copilot Usage Dashboard will be displayed in your console. This dashboard is a web interface for monitoring your API usage. +After starting the server, a URL to the Copilot-DirectAPI Usage Dashboard will be displayed in your console. This dashboard is a web interface for monitoring your API usage. 1. Start the server. For example, using npx: ```sh - npx copilot-api@latest start + npx copilot-directapi@latest start ``` 2. The server will output a URL to the usage viewer. Copy and paste this URL into your browser. It will look something like this: - `https://ericc-ch.github.io/copilot-api?endpoint=http://localhost:4141/usage` + `https://blueskyxn.github.io/Copilot-DirectAPI/?endpoint=http://localhost:4141/usage` - If you use the `start.bat` script on Windows, this page will open automatically. The dashboard provides a user-friendly interface to view your Copilot usage data: @@ -276,7 +282,7 @@ The dashboard provides a user-friendly interface to view your Copilot usage data - **Usage Quotas**: View a summary of your usage quotas for different services like Chat and Completions, displayed with progress bars for a quick overview. - **Detailed Information**: See the full JSON response from the API for a detailed breakdown of all available usage statistics. - **URL-based Configuration**: You can also specify the API endpoint directly in the URL using a query parameter. This is useful for bookmarks or sharing links. For example: - `https://ericc-ch.github.io/copilot-api?endpoint=http://your-api-server/usage` + `https://blueskyxn.github.io/Copilot-DirectAPI/?endpoint=http://your-api-server/usage` ## Using with Claude Code @@ -289,7 +295,7 @@ There are two ways to configure Claude Code to use this proxy: To get started, run the `start` command with the `--claude-code` flag: ```sh -npx copilot-api@latest start --claude-code +npx copilot-directapi@latest start --claude-code ``` You will be prompted to select a primary model and a "small, fast" model for background tasks. After selecting the models, a command will be copied to your clipboard. This command sets the necessary environment variables for Claude Code to use the proxy. @@ -346,6 +352,6 @@ bun run start - To avoid hitting GitHub Copilot's rate limits, you can use the following flags: - `--manual`: Enables manual approval for each request, giving you full control over when requests are sent. - - `--rate-limit `: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests. + - `--rate-limit `: Enforces a minimum time interval between requests. For example, `Copilot-DirectAPI start --rate-limit 30` will ensure there's at least a 30-second gap between requests. - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors. - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details. diff --git a/bun.lock b/bun.lock index 20e895e7f..df251ceb3 100644 --- a/bun.lock +++ b/bun.lock @@ -2,7 +2,7 @@ "lockfileVersion": 1, "workspaces": { "": { - "name": "copilot-api", + "name": "copilot-directapi", "dependencies": { "citty": "^0.1.6", "clipboardy": "^5.0.0", diff --git a/package.json b/package.json index a5adbb8e7..fc78d440a 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "copilot-api", + "name": "copilot-directapi", "version": "0.7.0", "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!", "keywords": [ @@ -7,16 +7,16 @@ "github-copilot", "openai-compatible" ], - "homepage": "https://github.com/ericc-ch/copilot-api", - "bugs": "https://github.com/ericc-ch/copilot-api/issues", + "homepage": "https://github.com/BlueSkyXN/Copilot-DirectAPI", + "bugs": "https://github.com/BlueSkyXN/Copilot-DirectAPI/issues", "repository": { "type": "git", - "url": "git+https://github.com/ericc-ch/copilot-api.git" + "url": "git+https://github.com/BlueSkyXN/Copilot-DirectAPI.git" }, "author": "Erick Christian ", "type": "module", "bin": { - "copilot-api": "./dist/main.js" + "Copilot-DirectAPI": "./dist/main.js" }, "files": [ "dist" diff --git a/pages/index.html b/pages/index.html index 57d16ef02..cfff01a4e 100644 --- a/pages/index.html +++ b/pages/index.html @@ -3,7 +3,7 @@ - Copilot API Usage Dashboard + Copilot-DirectAPI Usage Dashboard @@ -123,7 +123,7 @@ /> - Copilot API Usage Dashboard + Copilot-DirectAPI Usage Dashboard

Should be the same as the one in VSCode diff --git a/src/debug.ts b/src/debug.ts index b2aff8671..b2f5faac4 100644 --- a/src/debug.ts +++ b/src/debug.ts @@ -5,6 +5,7 @@ import consola from "consola" import fs from "node:fs/promises" import os from "node:os" +import { APP_NAME } from "./lib/app-info" import { PATHS } from "./lib/paths" interface DebugInfo { @@ -81,7 +82,7 @@ async function getDebugInfo(): Promise { } function printDebugInfoPlain(info: DebugInfo): void { - consola.info(`copilot-api debug + consola.info(`${APP_NAME} debug Version: ${info.version} Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch}) diff --git a/src/lib/app-info.ts b/src/lib/app-info.ts new file mode 100644 index 000000000..467a5f837 --- /dev/null +++ b/src/lib/app-info.ts @@ -0,0 +1,4 @@ +export const APP_NAME = "Copilot-DirectAPI" +export const PACKAGE_NAME = "copilot-directapi" +export const LEGACY_PACKAGE_NAME = "copilot-api" +export const USAGE_VIEWER_URL = "https://blueskyxn.github.io/Copilot-DirectAPI/" diff --git a/src/lib/paths.ts b/src/lib/paths.ts index 8d0a9f02b..8a0017bb6 100644 --- a/src/lib/paths.ts +++ b/src/lib/paths.ts @@ -2,9 +2,17 @@ import fs from "node:fs/promises" import os from "node:os" import path from "node:path" -const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api") +import { APP_NAME, LEGACY_PACKAGE_NAME } from "./app-info" +const APP_DIR = path.join(os.homedir(), ".local", "share", APP_NAME) +const LEGACY_APP_DIR = path.join( + os.homedir(), + ".local", + "share", + LEGACY_PACKAGE_NAME, +) const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token") +const LEGACY_GITHUB_TOKEN_PATH = path.join(LEGACY_APP_DIR, "github_token") export const PATHS = { APP_DIR, @@ -13,6 +21,7 @@ export const PATHS = { export async function ensurePaths(): Promise { await fs.mkdir(PATHS.APP_DIR, { recursive: true }) + await migrateLegacyGithubToken() await ensureFile(PATHS.GITHUB_TOKEN_PATH) } @@ -24,3 +33,25 @@ async function ensureFile(filePath: string): Promise { await fs.chmod(filePath, 0o600) } } + +async function migrateLegacyGithubToken(): Promise { + if (await pathExists(PATHS.GITHUB_TOKEN_PATH)) { + return + } + + if (!(await pathExists(LEGACY_GITHUB_TOKEN_PATH))) { + return + } + + await fs.copyFile(LEGACY_GITHUB_TOKEN_PATH, PATHS.GITHUB_TOKEN_PATH) + await fs.chmod(PATHS.GITHUB_TOKEN_PATH, 0o600) +} + +async function pathExists(filePath: string): Promise { + try { + await fs.access(filePath, fs.constants.F_OK) + return true + } catch { + return false + } +} diff --git a/src/lib/state.ts b/src/lib/state.ts index 5ba4dc1d1..f80043c11 100644 --- a/src/lib/state.ts +++ b/src/lib/state.ts @@ -8,6 +8,7 @@ export interface State { models?: ModelsResponse vsCodeVersion?: string + forceAgentInitiator: boolean manualApprove: boolean rateLimitWait: boolean showToken: boolean @@ -19,6 +20,7 @@ export interface State { export const state: State = { accountType: "individual", + forceAgentInitiator: false, manualApprove: false, rateLimitWait: false, showToken: false, diff --git a/src/main.ts b/src/main.ts index 4f6ca784b..2f5d78b48 100644 --- a/src/main.ts +++ b/src/main.ts @@ -2,6 +2,7 @@ import { defineCommand, runMain } from "citty" +import { APP_NAME } from "./lib/app-info" import { auth } from "./auth" import { checkUsage } from "./check-usage" import { debug } from "./debug" @@ -9,9 +10,9 @@ import { start } from "./start" const main = defineCommand({ meta: { - name: "copilot-api", + name: APP_NAME, description: - "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools.", + `${APP_NAME} wraps GitHub Copilot API and exposes an OpenAI/Anthropic-compatible interface.`, }, subCommands: { auth, start, "check-usage": checkUsage, debug }, }) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..6749e855e 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -18,7 +18,7 @@ export const createChatCompletions = async ( // Agent/user check for X-Initiator header // Determine if any message is from an agent ("assistant" or "tool") - const isAgentCall = payload.messages.some((msg) => + const isAgentCall = state.forceAgentInitiator || payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role), ) diff --git a/src/start.ts b/src/start.ts index 14abbbdff..6ba9c137b 100644 --- a/src/start.ts +++ b/src/start.ts @@ -6,6 +6,7 @@ import consola from "consola" import { serve, type ServerHandler } from "srvx" import invariant from "tiny-invariant" +import { APP_NAME, USAGE_VIEWER_URL } from "./lib/app-info" import { ensurePaths } from "./lib/paths" import { initProxyFromEnv } from "./lib/proxy" import { generateEnvScript } from "./lib/shell" @@ -18,6 +19,7 @@ interface RunServerOptions { port: number verbose: boolean accountType: string + allAgent: boolean manual: boolean rateLimit?: number rateLimitWait: boolean @@ -38,10 +40,15 @@ export async function runServer(options: RunServerOptions): Promise { } state.accountType = options.accountType + state.forceAgentInitiator = options.allAgent if (options.accountType !== "individual") { consola.info(`Using ${options.accountType} plan GitHub account`) } + if (options.allAgent) { + consola.info("All-agent mode enabled: forcing X-Initiator=agent") + } + state.manualApprove = options.manual state.rateLimitSeconds = options.rateLimit state.rateLimitWait = options.rateLimitWait @@ -111,7 +118,7 @@ export async function runServer(options: RunServerOptions): Promise { } consola.box( - `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`, + `🌐 ${APP_NAME} Usage Viewer: ${USAGE_VIEWER_URL}?endpoint=${serverUrl}/usage`, ) serve({ @@ -123,7 +130,7 @@ export async function runServer(options: RunServerOptions): Promise { export const start = defineCommand({ meta: { name: "start", - description: "Start the Copilot API server", + description: `Start the ${APP_NAME} server`, }, args: { port: { @@ -144,6 +151,11 @@ export const start = defineCommand({ default: "individual", description: "Account type to use (individual, business, enterprise)", }, + "all-agent": { + type: "boolean", + default: false, + description: "Always send X-Initiator=agent for compatibility debugging", + }, manual: { type: "boolean", default: false, @@ -172,7 +184,7 @@ export const start = defineCommand({ type: "boolean", default: false, description: - "Generate a command to launch Claude Code with Copilot API config", + `Generate a command to launch Claude Code with ${APP_NAME} config`, }, "show-token": { type: "boolean", @@ -195,6 +207,7 @@ export const start = defineCommand({ port: Number.parseInt(args.port, 10), verbose: args.verbose, accountType: args["account-type"], + allAgent: args["all-agent"], manual: args.manual, rateLimit, rateLimitWait: args.wait, diff --git a/start.bat b/start.bat index 1a0f8cb83..8e5070e08 100644 --- a/start.bat +++ b/start.bat @@ -1,6 +1,6 @@ @echo off echo ================================================ -echo GitHub Copilot API Server with Usage Viewer +echo Copilot-DirectAPI Server with Usage Viewer echo ================================================ echo. @@ -14,7 +14,7 @@ echo Starting server... echo The usage viewer page will open automatically after the server starts echo. -start "" "https://ericc-ch.github.io/copilot-api?endpoint=http://localhost:4141/usage" +start "" "https://blueskyxn.github.io/Copilot-DirectAPI/?endpoint=http://localhost:4141/usage" bun run dev pause diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts index d18e741aa..9f4c989f5 100644 --- a/tests/create-chat-completions.test.ts +++ b/tests/create-chat-completions.test.ts @@ -9,6 +9,7 @@ import { createChatCompletions } from "../src/services/copilot/create-chat-compl state.copilotToken = "test-token" state.vsCodeVersion = "1.0.0" state.accountType = "individual" +state.forceAgentInitiator = false // Helper to mock fetch const fetchMock = mock( @@ -54,3 +55,22 @@ test("sets X-Initiator to user if only user present", async () => { ).headers expect(headers["X-Initiator"]).toBe("user") }) + +test("forces X-Initiator to agent in all-agent mode", async () => { + state.forceAgentInitiator = true + + try { + const payload: ChatCompletionsPayload = { + messages: [{ role: "user", content: "hi" }], + model: "gpt-test", + } + await createChatCompletions(payload) + expect(fetchMock).toHaveBeenCalled() + const headers = ( + fetchMock.mock.calls[2][1] as { headers: Record } + ).headers + expect(headers["X-Initiator"]).toBe("agent") + } finally { + state.forceAgentInitiator = false + } +}) From 2eaf44865f6828b64b235aaaf9dfbdd32bda4669 Mon Sep 17 00:00:00 2001 From: BlueSkyXN <63384277+BlueSkyXN@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:22:49 +0800 Subject: [PATCH 2/6] chore: upgrade CI actions and fix code style Bump actions/checkout to v5 and actions/upload-artifact to v6; fix trailing newlines and auto-format string templates. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 4 ++-- .github/workflows/deploy-pages.yml | 4 ++-- .github/workflows/release-docker.yml | 3 +-- .github/workflows/release.yml | 2 +- src/main.ts | 5 ++--- src/services/copilot/create-chat-completions.ts | 6 +++--- src/start.ts | 3 +-- 7 files changed, 12 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4642f2754..3d9ebf7e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - uses: oven-sh/setup-bun@v2 with: @@ -32,7 +32,7 @@ jobs: run: bun run build - name: Upload build artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: copilot-directapi-dist-${{ github.sha }} path: | diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml index e5b05974c..adbd293ac 100644 --- a/.github/workflows/deploy-pages.yml +++ b/.github/workflows/deploy-pages.yml @@ -25,7 +25,7 @@ jobs: url: ${{ steps.deployment.outputs.page_url }} steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Setup Pages uses: actions/configure-pages@v4 @@ -37,4 +37,4 @@ jobs: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v4 \ No newline at end of file + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 97b124ba1..797a4a460 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -32,7 +32,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v5 - name: Set version id: version @@ -88,4 +88,3 @@ jobs: tags: ${{ steps.meta.outputs.tags }} platforms: linux/amd64,linux/arm64 labels: ${{ steps.meta.outputs.labels }} - diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4bfcf0433..5c81e4f04 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,7 +13,7 @@ jobs: release: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 diff --git a/src/main.ts b/src/main.ts index 2f5d78b48..c33a8d417 100644 --- a/src/main.ts +++ b/src/main.ts @@ -2,17 +2,16 @@ import { defineCommand, runMain } from "citty" -import { APP_NAME } from "./lib/app-info" import { auth } from "./auth" import { checkUsage } from "./check-usage" import { debug } from "./debug" +import { APP_NAME } from "./lib/app-info" import { start } from "./start" const main = defineCommand({ meta: { name: APP_NAME, - description: - `${APP_NAME} wraps GitHub Copilot API and exposes an OpenAI/Anthropic-compatible interface.`, + description: `${APP_NAME} wraps GitHub Copilot API and exposes an OpenAI/Anthropic-compatible interface.`, }, subCommands: { auth, start, "check-usage": checkUsage, debug }, }) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 6749e855e..260fb19ea 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -18,9 +18,9 @@ export const createChatCompletions = async ( // Agent/user check for X-Initiator header // Determine if any message is from an agent ("assistant" or "tool") - const isAgentCall = state.forceAgentInitiator || payload.messages.some((msg) => - ["assistant", "tool"].includes(msg.role), - ) + const isAgentCall = + state.forceAgentInitiator + || payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role)) // Build headers and add X-Initiator const headers: Record = { diff --git a/src/start.ts b/src/start.ts index 6ba9c137b..922ddaa69 100644 --- a/src/start.ts +++ b/src/start.ts @@ -183,8 +183,7 @@ export const start = defineCommand({ alias: "c", type: "boolean", default: false, - description: - `Generate a command to launch Claude Code with ${APP_NAME} config`, + description: `Generate a command to launch Claude Code with ${APP_NAME} config`, }, "show-token": { type: "boolean", From c07a321e103b706ab8e5e10471f32584f49ae51e Mon Sep 17 00:00:00 2001 From: BlueSkyXN <63384277+BlueSkyXN@users.noreply.github.com> Date: Mon, 30 Mar 2026 16:52:51 +0800 Subject: [PATCH 3/6] ci: add cross-platform binary build matrix and workflow_dispatch trigger Rename test job to verify, add build-binary job that compiles standalone binaries for linux-x64, darwin-arm64, and windows-x64 via bun --compile. Use --frozen-lockfile for reproducible installs and add smoke test for linux binary. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 89 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d9ebf7e9..943012800 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,9 +5,10 @@ on: branches: [master] pull_request: types: [opened, synchronize, reopened] + workflow_dispatch: jobs: - test: + verify: runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 @@ -17,7 +18,7 @@ jobs: bun-version: latest - name: Install dependencies - run: bun install + run: bun install --frozen-lockfile - name: Run linter run: bun run lint:all @@ -31,14 +32,84 @@ jobs: - name: Build run: bun run build - - name: Upload build artifact + build-binary: + name: Build Binary (${{ matrix.platform_name }}) + needs: verify + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - platform_name: linux-x64 + bun_target: bun-linux-x64-baseline + install_os: linux + install_cpu: x64 + binary_ext: "" + - platform_name: darwin-arm64 + bun_target: bun-darwin-arm64 + install_os: darwin + install_cpu: arm64 + binary_ext: "" + - platform_name: windows-x64 + bun_target: bun-windows-x64-baseline + install_os: win32 + install_cpu: x64 + binary_ext: ".exe" + + steps: + - uses: actions/checkout@v5 + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies for target platform + run: bun install --frozen-lockfile --os=${{ matrix.install_os }} --cpu=${{ matrix.install_cpu }} + + - name: Read package metadata + id: meta + env: + PLATFORM_NAME: ${{ matrix.platform_name }} + run: | + VERSION="$(bun -e 'const pkg = JSON.parse(await Bun.file("package.json").text()); process.stdout.write(pkg.version)')" + PACKAGE_DIR="Copilot-DirectAPI_v${VERSION}_${PLATFORM_NAME}" + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "package_dir=${PACKAGE_DIR}" >> "$GITHUB_OUTPUT" + + - name: Compile standalone binary + env: + BUN_TARGET: ${{ matrix.bun_target }} + BINARY_EXT: ${{ matrix.binary_ext }} + PACKAGE_DIR: ${{ steps.meta.outputs.package_dir }} + run: | + mkdir -p "dist/${PACKAGE_DIR}" + bun build --compile --target="${BUN_TARGET}" ./src/main.ts --outfile "dist/${PACKAGE_DIR}/Copilot-DirectAPI${BINARY_EXT}" + if [ -z "${BINARY_EXT}" ]; then + chmod +x "dist/${PACKAGE_DIR}/Copilot-DirectAPI" + fi + cp README.md LICENSE "dist/${PACKAGE_DIR}/" + + - name: Smoke test Linux binary + if: matrix.platform_name == 'linux-x64' + env: + PACKAGE_DIR: ${{ steps.meta.outputs.package_dir }} + run: | + chmod +x "dist/${PACKAGE_DIR}/Copilot-DirectAPI" + "./dist/${PACKAGE_DIR}/Copilot-DirectAPI" debug --json >/dev/null + + - name: Package artifact + id: package + env: + PACKAGE_DIR: ${{ steps.meta.outputs.package_dir }} + run: | + tar -C dist -czf "dist/${PACKAGE_DIR}.tar.gz" "${PACKAGE_DIR}" + echo "asset=dist/${PACKAGE_DIR}.tar.gz" >> "$GITHUB_OUTPUT" + echo "asset_name=${PACKAGE_DIR}.tar.gz" >> "$GITHUB_OUTPUT" + + - name: Upload platform artifact uses: actions/upload-artifact@v6 with: - name: copilot-directapi-dist-${{ github.sha }} - path: | - dist - package.json - README.md - LICENSE + name: ${{ steps.package.outputs.asset_name }} + path: ${{ steps.package.outputs.asset }} if-no-files-found: error retention-days: 14 From e234b28e884eca0e7c224bf158ed6a9c0037227a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:30:51 +0000 Subject: [PATCH 4/6] Initial plan From 30e63758173db38c4839bd34d09ac70c0377759c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:17:54 +0000 Subject: [PATCH 5/6] feat: absorb changes from caozhiyuan/copilot-api --- bun.lock | 1 + src/debug.ts | 3 +- src/lib/api-config.ts | 152 +++- src/lib/config.ts | 302 +++++++ src/lib/error.ts | 9 + src/lib/logger.ts | 187 +++++ src/lib/models.ts | 75 ++ src/lib/paths.ts | 46 +- src/lib/proxy.ts | 4 - src/lib/request-auth.ts | 101 +++ src/lib/request-context.ts | 37 + src/lib/state.ts | 10 +- src/lib/token.ts | 65 +- src/lib/trace.ts | 18 + src/lib/utils.ts | 211 ++++- src/main.ts | 41 +- src/routes/messages/anthropic-types.ts | 9 +- src/routes/messages/count-tokens-handler.ts | 85 +- src/routes/messages/handler.ts | 444 +++++++++- src/routes/messages/non-stream-translation.ts | 175 ++-- .../messages/responses-stream-translation.ts | 742 +++++++++++++++++ src/routes/messages/responses-translation.ts | 781 ++++++++++++++++++ src/routes/messages/stream-translation.ts | 308 +++++-- src/routes/messages/subagent-marker.ts | 78 ++ src/routes/models/route.ts | 1 + .../provider/messages/count-tokens-handler.ts | 76 ++ src/routes/provider/messages/handler.ts | 141 ++++ src/routes/provider/messages/route.ts | 24 + src/routes/provider/models/route.ts | 50 ++ src/routes/responses/handler.ts | 163 ++++ src/routes/responses/route.ts | 15 + src/routes/responses/stream-id-sync.ts | 97 +++ src/routes/responses/utils.ts | 148 ++++ src/server.ts | 24 + .../copilot/create-chat-completions.ts | 58 +- src/services/copilot/create-messages.ts | 126 +++ src/services/copilot/create-responses.ts | 405 +++++++++ src/services/copilot/get-models.ts | 7 + src/services/get-vscode-version.ts | 33 +- src/services/github/get-copilot-token.ts | 4 +- src/services/github/get-copilot-usage.ts | 11 +- src/services/github/get-device-code.ts | 18 +- src/services/github/get-user.ts | 4 +- src/services/github/poll-access-token.ts | 30 +- src/services/providers/anthropic-proxy.ts | 80 ++ src/start.ts | 28 +- tests/anthropic-request.test.ts | 16 +- tests/create-chat-completions.test.ts | 6 +- 48 files changed, 5155 insertions(+), 294 deletions(-) create mode 100644 src/lib/config.ts create mode 100644 src/lib/logger.ts create mode 100644 src/lib/models.ts create mode 100644 src/lib/request-auth.ts create mode 100644 src/lib/request-context.ts create mode 100644 src/lib/trace.ts create mode 100644 src/routes/messages/responses-stream-translation.ts create mode 100644 src/routes/messages/responses-translation.ts create mode 100644 src/routes/messages/subagent-marker.ts create mode 100644 src/routes/provider/messages/count-tokens-handler.ts create mode 100644 src/routes/provider/messages/handler.ts create mode 100644 src/routes/provider/messages/route.ts create mode 100644 src/routes/provider/models/route.ts create mode 100644 src/routes/responses/handler.ts create mode 100644 src/routes/responses/route.ts create mode 100644 src/routes/responses/stream-id-sync.ts create mode 100644 src/routes/responses/utils.ts create mode 100644 src/services/copilot/create-messages.ts create mode 100644 src/services/copilot/create-responses.ts create mode 100644 src/services/providers/anthropic-proxy.ts diff --git a/bun.lock b/bun.lock index df251ceb3..1e49ca7b5 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-directapi", diff --git a/src/debug.ts b/src/debug.ts index b2f5faac4..b2aff8671 100644 --- a/src/debug.ts +++ b/src/debug.ts @@ -5,7 +5,6 @@ import consola from "consola" import fs from "node:fs/promises" import os from "node:os" -import { APP_NAME } from "./lib/app-info" import { PATHS } from "./lib/paths" interface DebugInfo { @@ -82,7 +81,7 @@ async function getDebugInfo(): Promise { } function printDebugInfoPlain(info: DebugInfo): void { - consola.info(`${APP_NAME} debug + consola.info(`copilot-api debug Version: ${info.version} Runtime: ${info.runtime.name} ${info.runtime.version} (${info.runtime.platform} ${info.runtime.arch}) diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts index 83bce92ad..73d75ebf5 100644 --- a/src/lib/api-config.ts +++ b/src/lib/api-config.ts @@ -2,22 +2,145 @@ import { randomUUID } from "node:crypto" import type { State } from "./state" +export const isOpencodeOauthApp = (): boolean => { + return process.env.COPILOT_API_OAUTH_APP?.trim() === "opencode" +} + +export const normalizeDomain = (input: string): string => { + return input + .trim() + .replace(/^https?:\/\//u, "") + .replace(/\/+$/u, "") +} + +export const getEnterpriseDomain = (): string | null => { + const raw = (process.env.COPILOT_API_ENTERPRISE_URL ?? "").trim() + if (!raw) return null + const normalized = normalizeDomain(raw) + return normalized || null +} + +export const getGitHubBaseUrl = (): string => { + const resolvedDomain = getEnterpriseDomain() + return resolvedDomain ? `https://${resolvedDomain}` : GITHUB_BASE_URL +} + +export const getGitHubApiBaseUrl = (): string => { + const resolvedDomain = getEnterpriseDomain() + return resolvedDomain ? `https://api.${resolvedDomain}` : GITHUB_API_BASE_URL +} + +export const getOpencodeOauthHeaders = (): Record => { + return { + Accept: "application/json", + "Content-Type": "application/json", + "User-Agent": + "opencode/1.3.6 ai-sdk/provider-utils/4.0.21 runtime/bun/1.3.11, opencode/1.3.6", + } +} + +export const getOauthUrls = (): { + deviceCodeUrl: string + accessTokenUrl: string +} => { + const githubBaseUrl = getGitHubBaseUrl() + + return { + deviceCodeUrl: `${githubBaseUrl}/login/device/code`, + accessTokenUrl: `${githubBaseUrl}/login/oauth/access_token`, + } +} + +interface OauthAppConfig { + clientId: string + headers: Record + scope: string +} + +export const getOauthAppConfig = (): OauthAppConfig => { + if (isOpencodeOauthApp()) { + return { + clientId: OPENCODE_GITHUB_CLIENT_ID, + headers: getOpencodeOauthHeaders(), + scope: GITHUB_APP_SCOPES, + } + } + + return { + clientId: GITHUB_CLIENT_ID, + headers: standardHeaders(), + scope: GITHUB_APP_SCOPES, + } +} + +export const prepareForCompact = ( + headers: Record, + isCompact?: boolean, +) => { + if (isCompact) { + headers["x-initiator"] = "agent" + } +} + +export const prepareInteractionHeaders = ( + sessionId: string | undefined, + isSubagent: boolean, + headers: Record, +) => { + const sendInteractionHeaders = !isOpencodeOauthApp() + + if (isSubagent) { + headers["x-initiator"] = "agent" + if (sendInteractionHeaders) { + headers["x-interaction-type"] = "conversation-subagent" + } + } + + if (sessionId && sendInteractionHeaders) { + headers["x-interaction-id"] = sessionId + } +} + export const standardHeaders = () => ({ "content-type": "application/json", accept: "application/json", }) -const COPILOT_VERSION = "0.26.7" +const COPILOT_VERSION = "0.38.2" const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}` const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}` -const API_VERSION = "2025-04-01" +const API_VERSION = "2025-10-01" + +export const copilotBaseUrl = (state: State) => { + const enterpriseDomain = getEnterpriseDomain() + if (enterpriseDomain) { + return `https://copilot-api.${enterpriseDomain}` + } + + return state.accountType === "individual" ? + "https://api.githubcopilot.com" + : `https://api.${state.accountType}.githubcopilot.com` +} -export const copilotBaseUrl = (state: State) => - state.accountType === "individual" ? - "https://api.githubcopilot.com" - : `https://api.${state.accountType}.githubcopilot.com` -export const copilotHeaders = (state: State, vision: boolean = false) => { +export const copilotHeaders = ( + state: State, + requestId?: string, + vision: boolean = false, +) => { + if (isOpencodeOauthApp()) { + const headers: Record = { + Authorization: `Bearer ${state.copilotToken}`, + ...getOpencodeOauthHeaders(), + "Openai-Intent": "conversation-edits", + } + + if (vision) headers["Copilot-Vision-Request"] = "true" + + return headers + } + + const requestIdValue = requestId ?? randomUUID() const headers: Record = { Authorization: `Bearer ${state.copilotToken}`, "content-type": standardHeaders()["content-type"], @@ -25,14 +148,24 @@ export const copilotHeaders = (state: State, vision: boolean = false) => { "editor-version": `vscode/${state.vsCodeVersion}`, "editor-plugin-version": EDITOR_PLUGIN_VERSION, "user-agent": USER_AGENT, - "openai-intent": "conversation-panel", + "openai-intent": "conversation-agent", "x-github-api-version": API_VERSION, - "x-request-id": randomUUID(), + "x-request-id": requestIdValue, "x-vscode-user-agent-library-version": "electron-fetch", + "x-agent-task-id": requestIdValue, + "x-interaction-type": "conversation-agent", } if (vision) headers["copilot-vision-request"] = "true" + if (state.macMachineId) { + headers["vscode-machineid"] = state.macMachineId + } + + if (state.vsCodeSessionId) { + headers["vscode-sessionid"] = state.vsCodeSessionId + } + return headers } @@ -50,3 +183,4 @@ export const githubHeaders = (state: State) => ({ export const GITHUB_BASE_URL = "https://github.com" export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98" export const GITHUB_APP_SCOPES = ["read:user"].join(" ") +export const OPENCODE_GITHUB_CLIENT_ID = "Ov23li8tweQw6odWQebz" diff --git a/src/lib/config.ts b/src/lib/config.ts new file mode 100644 index 000000000..50cdb29ef --- /dev/null +++ b/src/lib/config.ts @@ -0,0 +1,302 @@ +import consola from "consola" +import fs from "node:fs" + +import { PATHS } from "./paths" + +export interface AppConfig { + auth?: { + apiKeys?: Array + } + providers?: Record + extraPrompts?: Record + smallModel?: string + responsesApiContextManagementModels?: Array + modelReasoningEfforts?: Record< + string, + "none" | "minimal" | "low" | "medium" | "high" | "xhigh" + > + useFunctionApplyPatch?: boolean + useMessagesApi?: boolean + anthropicApiKey?: string + useResponsesApiWebSearch?: boolean +} + +export interface ModelConfig { + temperature?: number + topP?: number + topK?: number +} + +export interface ProviderConfig { + type?: string + enabled?: boolean + baseUrl?: string + apiKey?: string + models?: Record + adjustInputTokens?: boolean +} + +export interface ResolvedProviderConfig { + name: string + type: "anthropic" + baseUrl: string + apiKey: string + models?: Record + adjustInputTokens?: boolean +} + +const gpt5ExplorationPrompt = `## Exploration and reading files +- **Think first.** Before any tool call, decide ALL files/resources you will need. +- **Batch everything.** If you need multiple files (even from different places), read them together. +- **multi_tool_use.parallel** Use multi_tool_use.parallel to parallelize tool calls and only this. +- **Only make sequential calls if you truly cannot know the next file without seeing a result first.** +- **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.` + +const gpt5CommentaryPrompt = `# Working with the user + +You interact with the user through a terminal. You have 2 ways of communicating with the users: +- Share intermediary updates in \`commentary\` channel. +- After you have completed all your work, send a message to the \`final\` channel. + +## Intermediary updates + +- Intermediary updates go to the \`commentary\` channel. +- User updates are short updates while you are working, they are NOT final answers. +- You use 1-2 sentence user updates to communicate progress and new information to the user as you are doing work. +- Do not begin responses with conversational interjections or meta commentary. Avoid openers such as acknowledgements ("Done —", "Got it", "Great question, ") or framing phrases. +- You provide user updates frequently, every 20s. +- Before exploring or doing substantial work, you start with a user update acknowledging the request and explaining your first step. You should include your understanding of the user request and explain what you will do. Avoid commenting on the request or using starters such as "Got it -" or "Understood -" etc. +- When exploring, e.g. searching, reading files, you provide user updates as you go, every 20s, explaining what context you are gathering and what you've learned. Vary your sentence structure when providing these updates to avoid sounding repetitive - in particular, don't start each sentence the same way. +- After you have sufficient context, and the work is substantial, you provide a longer plan (this is the only user update that may be longer than 2 sentences and can contain formatting). +- Before performing file edits of any kind, you provide updates explaining what edits you are making. +- As you are thinking, you very frequently provide updates even if not taking any actions, informing the user of your progress. You interrupt your thinking and send multiple updates in a row if thinking for more than 100 words. +- Tone of your updates MUST match your personality.` + +const defaultConfig: AppConfig = { + auth: { + apiKeys: [], + }, + providers: {}, + extraPrompts: { + "gpt-5-mini": gpt5ExplorationPrompt, + "gpt-5.3-codex": gpt5CommentaryPrompt, + "gpt-5.4-mini": gpt5CommentaryPrompt, + "gpt-5.4": gpt5CommentaryPrompt, + }, + smallModel: "gpt-5-mini", + responsesApiContextManagementModels: [], + modelReasoningEfforts: { + "gpt-5-mini": "low", + "gpt-5.3-codex": "xhigh", + "gpt-5.4-mini": "xhigh", + "gpt-5.4": "xhigh", + }, + useFunctionApplyPatch: true, + useMessagesApi: true, + useResponsesApiWebSearch: true, +} + +let cachedConfig: AppConfig | null = null + +function ensureConfigFile(): void { + try { + fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK) + } catch { + fs.mkdirSync(PATHS.APP_DIR, { recursive: true }) + fs.writeFileSync( + PATHS.CONFIG_PATH, + `${JSON.stringify(defaultConfig, null, 2)}\n`, + "utf8", + ) + try { + fs.chmodSync(PATHS.CONFIG_PATH, 0o600) + } catch { + return + } + } +} + +function readConfigFromDisk(): AppConfig { + ensureConfigFile() + try { + const raw = fs.readFileSync(PATHS.CONFIG_PATH, "utf8") + if (!raw.trim()) { + fs.writeFileSync( + PATHS.CONFIG_PATH, + `${JSON.stringify(defaultConfig, null, 2)}\n`, + "utf8", + ) + return defaultConfig + } + return JSON.parse(raw) as AppConfig + } catch (error) { + consola.error("Failed to read config file, using default config", error) + return defaultConfig + } +} + +function mergeDefaultConfig(config: AppConfig): { + mergedConfig: AppConfig + changed: boolean +} { + const extraPrompts = config.extraPrompts ?? {} + const defaultExtraPrompts = defaultConfig.extraPrompts ?? {} + const modelReasoningEfforts = config.modelReasoningEfforts ?? {} + const defaultModelReasoningEfforts = defaultConfig.modelReasoningEfforts ?? {} + + const missingExtraPromptModels = Object.keys(defaultExtraPrompts).filter( + (model) => !Object.hasOwn(extraPrompts, model), + ) + + const missingReasoningEffortModels = Object.keys( + defaultModelReasoningEfforts, + ).filter((model) => !Object.hasOwn(modelReasoningEfforts, model)) + + const hasExtraPromptChanges = missingExtraPromptModels.length > 0 + const hasReasoningEffortChanges = missingReasoningEffortModels.length > 0 + + if (!hasExtraPromptChanges && !hasReasoningEffortChanges) { + return { mergedConfig: config, changed: false } + } + + return { + mergedConfig: { + ...config, + extraPrompts: { + ...defaultExtraPrompts, + ...extraPrompts, + }, + modelReasoningEfforts: { + ...defaultModelReasoningEfforts, + ...modelReasoningEfforts, + }, + }, + changed: true, + } +} + +export function mergeConfigWithDefaults(): AppConfig { + const config = readConfigFromDisk() + const { mergedConfig, changed } = mergeDefaultConfig(config) + + if (changed) { + try { + fs.writeFileSync( + PATHS.CONFIG_PATH, + `${JSON.stringify(mergedConfig, null, 2)}\n`, + "utf8", + ) + } catch (writeError) { + consola.warn( + "Failed to write merged extraPrompts to config file", + writeError, + ) + } + } + + cachedConfig = mergedConfig + return mergedConfig +} + +export function getConfig(): AppConfig { + cachedConfig ??= readConfigFromDisk() + return cachedConfig +} + +export function getExtraPromptForModel(model: string): string { + const config = getConfig() + return config.extraPrompts?.[model] ?? "" +} + +export function getSmallModel(): string { + const config = getConfig() + return config.smallModel ?? "gpt-5-mini" +} + +export function getResponsesApiContextManagementModels(): Array { + const config = getConfig() + return ( + config.responsesApiContextManagementModels + ?? defaultConfig.responsesApiContextManagementModels + ?? [] + ) +} + +export function isResponsesApiContextManagementModel(model: string): boolean { + return getResponsesApiContextManagementModels().includes(model) +} + +export function getReasoningEffortForModel( + model: string, +): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" { + const config = getConfig() + return config.modelReasoningEfforts?.[model] ?? "high" +} + +export function normalizeProviderBaseUrl(url: string): string { + return url.trim().replace(/\/+$/u, "") +} + +export function getProviderConfig(name: string): ResolvedProviderConfig | null { + const providerName = name.trim() + if (!providerName) { + return null + } + + const config = getConfig() + const provider = config.providers?.[providerName] + if (!provider) { + return null + } + + if (provider.enabled === false) { + return null + } + + const type = provider.type ?? "anthropic" + if (type !== "anthropic") { + consola.warn( + `Provider ${providerName} is ignored because only anthropic type is supported`, + ) + return null + } + + const baseUrl = normalizeProviderBaseUrl(provider.baseUrl ?? "") + const apiKey = (provider.apiKey ?? "").trim() + if (!baseUrl || !apiKey) { + consola.warn( + `Provider ${providerName} is enabled but missing baseUrl or apiKey`, + ) + return null + } + + return { + name: providerName, + type, + baseUrl, + apiKey, + models: provider.models, + adjustInputTokens: provider.adjustInputTokens, + } +} + +export function listEnabledProviders(): Array { + const config = getConfig() + const providerNames = Object.keys(config.providers ?? {}) + return providerNames.filter((name) => getProviderConfig(name) !== null) +} + +export function isMessagesApiEnabled(): boolean { + const config = getConfig() + return config.useMessagesApi ?? true +} + +export function getAnthropicApiKey(): string | undefined { + const config = getConfig() + return config.anthropicApiKey ?? process.env.ANTHROPIC_API_KEY ?? undefined +} + +export function isResponsesApiWebSearchEnabled(): boolean { + const config = getConfig() + return config.useResponsesApiWebSearch ?? true +} diff --git a/src/lib/error.ts b/src/lib/error.ts index c39c22596..d310306f5 100644 --- a/src/lib/error.ts +++ b/src/lib/error.ts @@ -16,6 +16,15 @@ export async function forwardError(c: Context, error: unknown) { consola.error("Error occurred:", error) if (error instanceof HTTPError) { + if (error.response.status === 429) { + for (const [name, value] of error.response.headers) { + const lowerName = name.toLowerCase() + if (lowerName === "retry-after" || lowerName.startsWith("x-")) { + c.header(name, value) + } + } + } + const errorText = await error.response.text() let errorJson: unknown try { diff --git a/src/lib/logger.ts b/src/lib/logger.ts new file mode 100644 index 000000000..07249987a --- /dev/null +++ b/src/lib/logger.ts @@ -0,0 +1,187 @@ +import consola, { type ConsolaInstance } from "consola" +import fs from "node:fs" +import path from "node:path" +import util from "node:util" + +import { PATHS } from "./paths" +import { requestContext } from "./request-context" +import { state } from "./state" + +const LOG_RETENTION_DAYS = 7 +const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000 +const CLEANUP_INTERVAL_MS = 24 * 60 * 60 * 1000 +const LOG_DIR = path.join(PATHS.APP_DIR, "logs") +const FLUSH_INTERVAL_MS = 1000 +const MAX_BUFFER_SIZE = 100 + +const logStreams = new Map() +const logBuffers = new Map>() + +const ensureLogDirectory = () => { + if (!fs.existsSync(LOG_DIR)) { + fs.mkdirSync(LOG_DIR, { recursive: true }) + } +} + +const cleanupOldLogs = () => { + if (!fs.existsSync(LOG_DIR)) { + return + } + + const now = Date.now() + + for (const entry of fs.readdirSync(LOG_DIR)) { + const filePath = path.join(LOG_DIR, entry) + + let stats: fs.Stats + try { + stats = fs.statSync(filePath) + } catch { + continue + } + + if (!stats.isFile()) { + continue + } + + if (now - stats.mtimeMs > LOG_RETENTION_MS) { + try { + fs.rmSync(filePath) + } catch { + continue + } + } + } +} + +const formatArgs = (args: Array) => + args + .map((arg) => + typeof arg === "string" ? arg : ( + util.inspect(arg, { depth: null, colors: false }) + ), + ) + .join(" ") + +const sanitizeName = (name: string) => { + const normalized = name + .toLowerCase() + .replaceAll(/[^a-z0-9]+/g, "-") + .replaceAll(/^-+|-+$/g, "") + + return normalized === "" ? "handler" : normalized +} + +const getLogStream = (filePath: string): fs.WriteStream => { + let stream = logStreams.get(filePath) + if (!stream || stream.destroyed) { + stream = fs.createWriteStream(filePath, { flags: "a" }) + logStreams.set(filePath, stream) + + stream.on("error", (error: unknown) => { + console.warn("Log stream error", error) + logStreams.delete(filePath) + }) + } + return stream +} + +const flushBuffer = (filePath: string) => { + const buffer = logBuffers.get(filePath) + if (!buffer || buffer.length === 0) { + return + } + + const stream = getLogStream(filePath) + const content = buffer.join("\n") + "\n" + stream.write(content, (error) => { + if (error) { + console.warn("Failed to write handler log", error) + } + }) + + logBuffers.set(filePath, []) +} + +const flushAllBuffers = () => { + for (const filePath of logBuffers.keys()) { + flushBuffer(filePath) + } +} + +const appendLine = (filePath: string, line: string) => { + let buffer = logBuffers.get(filePath) + if (!buffer) { + buffer = [] + logBuffers.set(filePath, buffer) + } + + buffer.push(line) + + if (buffer.length >= MAX_BUFFER_SIZE) { + flushBuffer(filePath) + } +} + +const flushIntervalId = setInterval(flushAllBuffers, FLUSH_INTERVAL_MS) + +const cleanup = () => { + clearInterval(flushIntervalId) + flushAllBuffers() + for (const stream of logStreams.values()) { + stream.end() + } + logStreams.clear() + logBuffers.clear() +} + +process.on("exit", cleanup) +process.on("SIGINT", () => { + cleanup() + process.exit(0) +}) +process.on("SIGTERM", () => { + cleanup() + process.exit(0) +}) + +let lastCleanup = 0 + +export const createHandlerLogger = (name: string): ConsolaInstance => { + ensureLogDirectory() + + const sanitizedName = sanitizeName(name) + const instance = consola.withTag(name) + + if (state.verbose) { + instance.level = 5 + } + instance.setReporters([]) + + instance.addReporter({ + log(logObj) { + ensureLogDirectory() + + if (Date.now() - lastCleanup > CLEANUP_INTERVAL_MS) { + cleanupOldLogs() + lastCleanup = Date.now() + } + + const context = requestContext.getStore() + const traceId = context?.traceId + const date = logObj.date + const dateKey = date.toLocaleDateString("sv-SE") + const timestamp = date.toLocaleString("sv-SE", { hour12: false }) + const filePath = path.join(LOG_DIR, `${sanitizedName}-${dateKey}.log`) + const message = formatArgs(logObj.args as Array) + const traceIdStr = traceId ? ` [${traceId}]` : "" + const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${traceIdStr}${ + message ? ` ${message}` : "" + }` + + appendLine(filePath, line) + }, + }) + + return instance +} diff --git a/src/lib/models.ts b/src/lib/models.ts new file mode 100644 index 000000000..f5f621eab --- /dev/null +++ b/src/lib/models.ts @@ -0,0 +1,75 @@ +import type { Model } from "~/services/copilot/get-models" + +import { state } from "~/lib/state" + +export const findEndpointModel = (sdkModelId: string): Model | undefined => { + const models = state.models?.data ?? [] + const exactMatch = models.find((m) => m.id === sdkModelId) + if (exactMatch) { + return exactMatch + } + + const normalized = _normalizeSdkModelId(sdkModelId) + if (!normalized) { + return undefined + } + + const modelName = `claude-${normalized.family}-${normalized.version}` + const model = models.find((m) => m.id === modelName) + if (model) { + return model + } + + return undefined +} + +/** + * Normalizes an SDK model ID to extract the model family and version. + * this method from github copilot extension + * Examples: + * - "claude-opus-4-5-20251101" -> { family: "opus", version: "4.5" } + * - "claude-3-5-sonnet-20241022" -> { family: "sonnet", version: "3.5" } + * - "claude-sonnet-4-20250514" -> { family: "sonnet", version: "4" } + * - "claude-haiku-3-5-20250514" -> { family: "haiku", version: "3.5" } + * - "claude-haiku-4.5" -> { family: "haiku", version: "4.5" } + */ +const _normalizeSdkModelId = ( + sdkModelId: string, +): { family: string; version: string } | undefined => { + const lower = sdkModelId.toLowerCase() + + // Strip date suffix (8 digits at the end) + const withoutDate = lower.replace(/-\d{8}$/, "") + + // Pattern 1: claude-{family}-{major}-{minor} (e.g., claude-opus-4-5, claude-haiku-3-5) + const pattern1 = withoutDate.match(/^claude-(\w+)-(\d+)-(\d+)$/) + if (pattern1) { + return { family: pattern1[1], version: `${pattern1[2]}.${pattern1[3]}` } + } + + // Pattern 2: claude-{major}-{minor}-{family} (e.g., claude-3-5-sonnet) + const pattern2 = withoutDate.match(/^claude-(\d+)-(\d+)-(\w+)$/) + if (pattern2) { + return { family: pattern2[3], version: `${pattern2[1]}.${pattern2[2]}` } + } + + // Pattern 3: claude-{family}-{major}.{minor} (e.g., claude-haiku-4.5) + const pattern3 = withoutDate.match(/^claude-(\w+)-(\d+)\.(\d+)$/) + if (pattern3) { + return { family: pattern3[1], version: `${pattern3[2]}.${pattern3[3]}` } + } + + // Pattern 4: claude-{family}-{major} (e.g., claude-sonnet-4) + const pattern4 = withoutDate.match(/^claude-(\w+)-(\d+)$/) + if (pattern4) { + return { family: pattern4[1], version: pattern4[2] } + } + + // Pattern 5: claude-{major}-{family} (e.g., claude-3-opus) + const pattern5 = withoutDate.match(/^claude-(\d+)-(\w+)$/) + if (pattern5) { + return { family: pattern5[2], version: pattern5[1] } + } + + return undefined +} diff --git a/src/lib/paths.ts b/src/lib/paths.ts index 8a0017bb6..dba35a791 100644 --- a/src/lib/paths.ts +++ b/src/lib/paths.ts @@ -2,27 +2,29 @@ import fs from "node:fs/promises" import os from "node:os" import path from "node:path" -import { APP_NAME, LEGACY_PACKAGE_NAME } from "./app-info" +const AUTH_APP = process.env.COPILOT_API_OAUTH_APP?.trim() || "" +const ENTERPRISE_PREFIX = process.env.COPILOT_API_ENTERPRISE_URL ? "ent_" : "" -const APP_DIR = path.join(os.homedir(), ".local", "share", APP_NAME) -const LEGACY_APP_DIR = path.join( - os.homedir(), - ".local", - "share", - LEGACY_PACKAGE_NAME, +const DEFAULT_DIR = path.join(os.homedir(), ".local", "share", "copilot-api") +const APP_DIR = process.env.COPILOT_API_HOME || DEFAULT_DIR + +const GITHUB_TOKEN_PATH = path.join( + APP_DIR, + AUTH_APP, + ENTERPRISE_PREFIX + "github_token", ) -const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token") -const LEGACY_GITHUB_TOKEN_PATH = path.join(LEGACY_APP_DIR, "github_token") +const CONFIG_PATH = path.join(APP_DIR, "config.json") export const PATHS = { APP_DIR, GITHUB_TOKEN_PATH, + CONFIG_PATH, } export async function ensurePaths(): Promise { - await fs.mkdir(PATHS.APP_DIR, { recursive: true }) - await migrateLegacyGithubToken() + await fs.mkdir(path.join(PATHS.APP_DIR, AUTH_APP), { recursive: true }) await ensureFile(PATHS.GITHUB_TOKEN_PATH) + await ensureFile(PATHS.CONFIG_PATH) } async function ensureFile(filePath: string): Promise { @@ -33,25 +35,3 @@ async function ensureFile(filePath: string): Promise { await fs.chmod(filePath, 0o600) } } - -async function migrateLegacyGithubToken(): Promise { - if (await pathExists(PATHS.GITHUB_TOKEN_PATH)) { - return - } - - if (!(await pathExists(LEGACY_GITHUB_TOKEN_PATH))) { - return - } - - await fs.copyFile(LEGACY_GITHUB_TOKEN_PATH, PATHS.GITHUB_TOKEN_PATH) - await fs.chmod(PATHS.GITHUB_TOKEN_PATH, 0o600) -} - -async function pathExists(filePath: string): Promise { - try { - await fs.access(filePath, fs.constants.F_OK) - return true - } catch { - return false - } -} diff --git a/src/lib/proxy.ts b/src/lib/proxy.ts index 22beb5819..18c6ab340 100644 --- a/src/lib/proxy.ts +++ b/src/lib/proxy.ts @@ -9,10 +9,6 @@ export function initProxyFromEnv(): void { const direct = new Agent() const proxies = new Map() - // We only need a minimal dispatcher that implements `dispatch` at runtime. - // Typing the object as `Dispatcher` forces TypeScript to require many - // additional methods. Instead, keep a plain object and cast when passing - // to `setGlobalDispatcher`. const dispatcher = { dispatch( options: Dispatcher.DispatchOptions, diff --git a/src/lib/request-auth.ts b/src/lib/request-auth.ts new file mode 100644 index 000000000..d974c7a06 --- /dev/null +++ b/src/lib/request-auth.ts @@ -0,0 +1,101 @@ +import type { Context, MiddlewareHandler } from "hono" + +import consola from "consola" + +import { getConfig } from "./config" + +interface AuthMiddlewareOptions { + getApiKeys?: () => Array + allowUnauthenticatedPaths?: Array + allowOptionsBypass?: boolean +} + +export function normalizeApiKeys(apiKeys: unknown): Array { + if (!Array.isArray(apiKeys)) { + if (apiKeys !== undefined) { + consola.warn("Invalid auth.apiKeys config. Expected an array of strings.") + } + return [] + } + + const normalizedKeys = apiKeys + .filter((key): key is string => typeof key === "string") + .map((key) => key.trim()) + .filter((key) => key.length > 0) + + if (normalizedKeys.length !== apiKeys.length) { + consola.warn( + "Invalid auth.apiKeys entries found. Only non-empty strings are allowed.", + ) + } + + return [...new Set(normalizedKeys)] +} + +export function getConfiguredApiKeys(): Array { + const config = getConfig() + return normalizeApiKeys(config.auth?.apiKeys) +} + +export function extractRequestApiKey(c: Context): string | null { + const xApiKey = c.req.header("x-api-key")?.trim() + if (xApiKey) { + return xApiKey + } + + const authorization = c.req.header("authorization") + if (!authorization) { + return null + } + + const [scheme, ...rest] = authorization.trim().split(/\s+/) + if (scheme.toLowerCase() !== "bearer") { + return null + } + + const bearerToken = rest.join(" ").trim() + return bearerToken || null +} + +function createUnauthorizedResponse(c: Context): Response { + c.header("WWW-Authenticate", 'Bearer realm="copilot-api"') + return c.json( + { + error: { + message: "Unauthorized", + type: "authentication_error", + }, + }, + 401, + ) +} + +export function createAuthMiddleware( + options: AuthMiddlewareOptions = {}, +): MiddlewareHandler { + const getApiKeys = options.getApiKeys ?? getConfiguredApiKeys + const allowUnauthenticatedPaths = options.allowUnauthenticatedPaths ?? ["/"] + const allowOptionsBypass = options.allowOptionsBypass ?? true + + return async (c, next) => { + if (allowOptionsBypass && c.req.method === "OPTIONS") { + return next() + } + + if (allowUnauthenticatedPaths.includes(c.req.path)) { + return next() + } + + const apiKeys = getApiKeys() + if (apiKeys.length === 0) { + return next() + } + + const requestApiKey = extractRequestApiKey(c) + if (!requestApiKey || !apiKeys.includes(requestApiKey)) { + return createUnauthorizedResponse(c) + } + + return next() + } +} diff --git a/src/lib/request-context.ts b/src/lib/request-context.ts new file mode 100644 index 000000000..cb4dc44f1 --- /dev/null +++ b/src/lib/request-context.ts @@ -0,0 +1,37 @@ +import { AsyncLocalStorage } from "node:async_hooks" + +export interface RequestContext { + traceId: string + startTime: number +} + +const TRACE_ID_MAX_LENGTH = 64 +const TRACE_ID_PATTERN = /^\w[\w.-]*$/ + +const asyncLocalStorage = new AsyncLocalStorage() + +export const requestContext = { + getStore: () => asyncLocalStorage.getStore(), + run: (context: RequestContext, callback: () => T) => + asyncLocalStorage.run(context, callback), +} + +export function generateTraceId(): string { + const timestamp = Date.now().toString(36) + const random = Math.random().toString(36).slice(2, 8) + return `${timestamp}-${random}` +} + +export function resolveTraceId(traceId: string | null | undefined): string { + const candidate = traceId?.trim() + + if ( + !candidate + || candidate.length > TRACE_ID_MAX_LENGTH + || !TRACE_ID_PATTERN.test(candidate) + ) { + return generateTraceId() + } + + return candidate +} diff --git a/src/lib/state.ts b/src/lib/state.ts index f80043c11..0ff08fcbd 100644 --- a/src/lib/state.ts +++ b/src/lib/state.ts @@ -8,7 +8,9 @@ export interface State { models?: ModelsResponse vsCodeVersion?: string - forceAgentInitiator: boolean + macMachineId?: string + vsCodeSessionId?: string + manualApprove: boolean rateLimitWait: boolean showToken: boolean @@ -16,12 +18,16 @@ export interface State { // Rate limiting configuration rateLimitSeconds?: number lastRequestTimestamp?: number + verbose: boolean + + // BlueSkyXN: force all requests to use agent initiator + forceAgentInitiator?: boolean } export const state: State = { accountType: "individual", - forceAgentInitiator: false, manualApprove: false, rateLimitWait: false, showToken: false, + verbose: false, } diff --git a/src/lib/token.ts b/src/lib/token.ts index fc8d2785f..4e2633c3a 100644 --- a/src/lib/token.ts +++ b/src/lib/token.ts @@ -1,6 +1,8 @@ import consola from "consola" import fs from "node:fs/promises" +import { setTimeout as delay } from "node:timers/promises" +import { isOpencodeOauthApp } from "~/lib/api-config" import { PATHS } from "~/lib/paths" import { getCopilotToken } from "~/services/github/get-copilot-token" import { getDeviceCode } from "~/services/github/get-device-code" @@ -10,36 +12,87 @@ import { pollAccessToken } from "~/services/github/poll-access-token" import { HTTPError } from "./error" import { state } from "./state" +let copilotRefreshLoopController: AbortController | null = null + +export const stopCopilotRefreshLoop = () => { + if (!copilotRefreshLoopController) { + return + } + + copilotRefreshLoopController.abort() + copilotRefreshLoopController = null +} + const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8") const writeGithubToken = (token: string) => fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token) export const setupCopilotToken = async () => { + if (isOpencodeOauthApp()) { + if (!state.githubToken) throw new Error(`opencode token not found`) + + state.copilotToken = state.githubToken + + consola.debug("GitHub Copilot token set from opencode auth token") + if (state.showToken) { + consola.info("Copilot token:", state.copilotToken) + } + + stopCopilotRefreshLoop() + return + } + const { token, refresh_in } = await getCopilotToken() state.copilotToken = token - // Display the Copilot token to the screen consola.debug("GitHub Copilot Token fetched successfully!") if (state.showToken) { consola.info("Copilot token:", token) } - const refreshInterval = (refresh_in - 60) * 1000 - setInterval(async () => { + stopCopilotRefreshLoop() + + const controller = new AbortController() + copilotRefreshLoopController = controller + + runCopilotRefreshLoop(refresh_in, controller.signal) + .catch(() => { + consola.warn("Copilot token refresh loop stopped") + }) + .finally(() => { + if (copilotRefreshLoopController === controller) { + copilotRefreshLoopController = null + } + }) +} + +const runCopilotRefreshLoop = async ( + refreshIn: number, + signal: AbortSignal, +) => { + let nextRefreshDelayMs = (refreshIn - 60) * 1000 + + while (!signal.aborted) { + await delay(nextRefreshDelayMs, undefined, { signal }) + consola.debug("Refreshing Copilot token") + try { - const { token } = await getCopilotToken() + const { token, refresh_in } = await getCopilotToken() state.copilotToken = token consola.debug("Copilot token refreshed") if (state.showToken) { consola.info("Refreshed Copilot token:", token) } + + nextRefreshDelayMs = (refresh_in - 60) * 1000 } catch (error) { consola.error("Failed to refresh Copilot token:", error) - throw error + nextRefreshDelayMs = 15_000 + consola.warn(`Retrying Copilot token refresh in ${nextRefreshDelayMs}ms`) } - }, refreshInterval) + } } interface SetupGitHubTokenOptions { diff --git a/src/lib/trace.ts b/src/lib/trace.ts new file mode 100644 index 000000000..618c4a9cc --- /dev/null +++ b/src/lib/trace.ts @@ -0,0 +1,18 @@ +import type { MiddlewareHandler } from "hono" + +import { requestContext, resolveTraceId } from "./request-context" + +export const traceIdMiddleware: MiddlewareHandler = async (c, next) => { + const traceId = resolveTraceId(c.req.header("x-trace-id")) + + c.header("x-trace-id", traceId) + + const context = { + traceId, + startTime: Date.now(), + } + + await requestContext.run(context, async () => { + await next() + }) +} diff --git a/src/lib/utils.ts b/src/lib/utils.ts index cc80be667..4ac12fc78 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -1,4 +1,10 @@ +import type { Context } from "hono" + import consola from "consola" +import { createHash, randomUUID } from "node:crypto" +import { networkInterfaces } from "node:os" + +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" import { getModels } from "~/services/copilot/get-models" import { getVSCodeVersion } from "~/services/get-vscode-version" @@ -15,7 +21,13 @@ export const isNullish = (value: unknown): value is null | undefined => export async function cacheModels(): Promise { const models = await getModels() - state.models = models + state.models = { + ...models, + data: models.data.filter( + (model) => + model.model_picker_enabled || model.capabilities.type === "embeddings", + ), + } } export const cacheVSCodeVersion = async () => { @@ -24,3 +36,200 @@ export const cacheVSCodeVersion = async () => { consola.info(`Using VSCode version: ${response}`) } + +const invalidMacAddresses = new Set([ + "00:00:00:00:00:00", + "ff:ff:ff:ff:ff:ff", + "ac:de:48:00:11:22", +]) + +function validateMacAddress(candidate: string): boolean { + const tempCandidate = candidate.replaceAll("-", ":").toLowerCase() + return !invalidMacAddresses.has(tempCandidate) +} + +export function getMac(): string | null { + const ifaces = networkInterfaces() + // eslint-disable-next-line guard-for-in + for (const name in ifaces) { + const networkInterface = ifaces[name] + if (networkInterface) { + for (const { mac } of networkInterface) { + if (validateMacAddress(mac)) { + return mac + } + } + } + } + return null +} + +export const cacheMacMachineId = () => { + const macAddress = getMac() ?? randomUUID() + state.macMachineId = createHash("sha256") + .update(macAddress, "utf8") + .digest("hex") + consola.debug(`Using machine ID: ${state.macMachineId}`) +} + +const SESSION_REFRESH_BASE_MS = 60 * 60 * 1000 +const SESSION_REFRESH_JITTER_MS = 20 * 60 * 1000 +let vsCodeSessionRefreshTimer: ReturnType | null = null + +const generateSessionId = () => { + state.vsCodeSessionId = randomUUID() + Date.now().toString() + consola.debug(`Generated VSCode session ID: ${state.vsCodeSessionId}`) +} + +export const stopVsCodeSessionRefreshLoop = () => { + if (vsCodeSessionRefreshTimer) { + clearTimeout(vsCodeSessionRefreshTimer) + vsCodeSessionRefreshTimer = null + } +} + +const scheduleSessionIdRefresh = () => { + const randomDelay = Math.floor(Math.random() * SESSION_REFRESH_JITTER_MS) + const delay = SESSION_REFRESH_BASE_MS + randomDelay + consola.debug( + `Scheduling next VSCode session ID refresh in ${Math.round( + delay / 1000, + )} seconds`, + ) + + stopVsCodeSessionRefreshLoop() + vsCodeSessionRefreshTimer = setTimeout(() => { + try { + generateSessionId() + } catch (error) { + consola.error("Failed to refresh session ID, rescheduling...", error) + } finally { + scheduleSessionIdRefresh() + } + }, delay) +} + +export const cacheVsCodeSessionId = () => { + stopVsCodeSessionRefreshLoop() + generateSessionId() + scheduleSessionIdRefresh() +} + +interface PayloadMessage { + role?: string + content?: string | Array<{ type?: string; text?: string }> | null + type?: string +} + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null + +const getUserIdJsonField = ( + userIdPayload: Record | null, + field: string, +): string | null => { + const value = userIdPayload?.[field] + return typeof value === "string" && value.length > 0 ? value : null +} + +const parseJsonUserId = (userId: string): Record | null => { + try { + const parsed: unknown = JSON.parse(userId) + return isRecord(parsed) ? parsed : null + } catch { + return null + } +} + +export const parseUserIdMetadata = ( + userId: string | undefined, +): { safetyIdentifier: string | null; sessionId: string | null } => { + if (!userId || typeof userId !== "string") { + return { safetyIdentifier: null, sessionId: null } + } + + const legacySafetyIdentifier = + userId.match(/user_([^_]+)_account/)?.[1] ?? null + const legacySessionId = userId.match(/_session_(.+)$/)?.[1] ?? null + + const parsedUserId = + legacySafetyIdentifier && legacySessionId ? null : parseJsonUserId(userId) + + const safetyIdentifier = + legacySafetyIdentifier + ?? getUserIdJsonField(parsedUserId, "device_id") + ?? getUserIdJsonField(parsedUserId, "account_uuid") + const sessionId = + legacySessionId ?? getUserIdJsonField(parsedUserId, "session_id") + + return { safetyIdentifier, sessionId } +} + +const findLastUserContent = ( + messages: Array, +): string | null => { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.role === "user" && msg.content) { + if (typeof msg.content === "string") { + return msg.content + } else if (Array.isArray(msg.content)) { + const array = msg.content + .filter((n) => n.type !== "tool_result") + .map((n) => ({ ...n, cache_control: undefined })) + if (array.length > 0) { + return JSON.stringify(array) + } + } + } + } + return null +} + +export const generateRequestIdFromPayload = ( + payload: { + messages: string | Array | undefined + }, + sessionId?: string, +): string => { + const messages = payload.messages + if (messages) { + const lastUserContent = + typeof messages === "string" ? messages : findLastUserContent(messages) + + if (lastUserContent) { + return getUUID( + (sessionId ?? "") + (state.macMachineId ?? "") + lastUserContent, + ) + } + } + + return randomUUID() +} + +export const getRootSessionId = ( + anthropicPayload: AnthropicMessagesPayload, + c: Context, +): string | undefined => { + const userId = anthropicPayload.metadata?.user_id + const sessionId = + userId ? + parseUserIdMetadata(userId).sessionId || undefined + : c.req.header("x-session-id") + + return sessionId ? getUUID(sessionId) : sessionId +} + +export const getUUID = (content: string): string => { + const uuidBytes = createHash("sha256") + .update(content) + .digest() + .subarray(0, 16) + + uuidBytes[6] = (uuidBytes[6] & 0x0f) | 0x40 + uuidBytes[8] = (uuidBytes[8] & 0x3f) | 0x80 + + const uuidHex = uuidBytes.toString("hex") + + return `${uuidHex.slice(0, 8)}-${uuidHex.slice(8, 12)}-${uuidHex.slice(12, 16)}-${uuidHex.slice(16, 20)}-${uuidHex.slice(20)}` +} diff --git a/src/main.ts b/src/main.ts index c33a8d417..378db614f 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,12 +1,42 @@ #!/usr/bin/env node -import { defineCommand, runMain } from "citty" +import { defineCommand, runMain, parseArgs } from "citty" -import { auth } from "./auth" -import { checkUsage } from "./check-usage" -import { debug } from "./debug" import { APP_NAME } from "./lib/app-info" -import { start } from "./start" + +const cliArgs = { + "api-home": { + type: "string", + description: "Path to the API home directory.", + }, + "oauth-app": { + type: "string", + description: "OAuth app identifier.", + }, + "enterprise-url": { + type: "string", + description: "Enterprise URL for GitHub.", + }, +} as const + +const args = parseArgs(process.argv, cliArgs) + +// Set environment variables before loading other modules +if (typeof args["api-home"] === "string") { + process.env.COPILOT_API_HOME = args["api-home"] +} +if (typeof args["oauth-app"] === "string") { + process.env.COPILOT_API_OAUTH_APP = args["oauth-app"] +} +if (typeof args["enterprise-url"] === "string") { + process.env.COPILOT_API_ENTERPRISE_URL = args["enterprise-url"] +} + +// Dynamically import other modules to ensure environment variables are set +const { auth } = await import("./auth") +const { checkUsage } = await import("./check-usage") +const { debug } = await import("./debug") +const { start } = await import("./start") const main = defineCommand({ meta: { @@ -14,6 +44,7 @@ const main = defineCommand({ description: `${APP_NAME} wraps GitHub Copilot API and exposes an OpenAI/Anthropic-compatible interface.`, }, subCommands: { auth, start, "check-usage": checkUsage, debug }, + args: cliArgs, }) await runMain(main) diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..e6049d0b6 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -19,10 +19,13 @@ export interface AnthropicMessagesPayload { name?: string } thinking?: { - type: "enabled" + type: "enabled" | "adaptive" budget_tokens?: number } service_tier?: "auto" | "standard_only" + output_config?: { + effort?: "low" | "medium" | "high" | "max" + } } export interface AnthropicTextBlock { @@ -42,7 +45,7 @@ export interface AnthropicImageBlock { export interface AnthropicToolResultBlock { type: "tool_result" tool_use_id: string - content: string + content: string | Array is_error?: boolean } @@ -56,6 +59,7 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + signature: string } export type AnthropicUserContentBlock = @@ -196,6 +200,7 @@ export interface AnthropicStreamState { messageStartSent: boolean contentBlockIndex: number contentBlockOpen: boolean + thinkingBlockOpen: boolean toolCalls: { [openAIToolIndex: number]: { id: string diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts index 2ec849cb8..0fb98445a 100644 --- a/src/routes/messages/count-tokens-handler.ts +++ b/src/routes/messages/count-tokens-handler.ts @@ -2,26 +2,80 @@ import type { Context } from "hono" import consola from "consola" -import { state } from "~/lib/state" +import { getAnthropicApiKey } from "~/lib/config" import { getTokenCount } from "~/lib/tokenizer" +import { findEndpointModel } from "../../lib/models" import { type AnthropicMessagesPayload } from "./anthropic-types" import { translateToOpenAI } from "./non-stream-translation" /** - * Handles token counting for Anthropic messages + * Forwards token counting to Anthropic's real /v1/messages/count_tokens endpoint. + * Returns the result on success, or null to fall through to estimation. + */ +async function countTokensViaAnthropic( + c: Context, + payload: AnthropicMessagesPayload, +): Promise { + if (!payload.model.startsWith("claude")) return null + + const apiKey = getAnthropicApiKey() + if (!apiKey) return null + + // Copilot uses dotted names (claude-opus-4.6) but Anthropic requires dashes (claude-opus-4-6) + const model = payload.model.replaceAll(".", "-") + + const res = await fetch( + "https://api.anthropic.com/v1/messages/count_tokens", + { + method: "POST", + headers: { + "content-type": "application/json", + "x-api-key": apiKey, + "anthropic-version": "2023-06-01", + "anthropic-beta": "token-counting-2024-11-01", + }, + body: JSON.stringify({ ...payload, model }), + }, + ) + + if (!res.ok) { + consola.warn( + "Anthropic count_tokens failed:", + res.status, + await res.text().catch(() => ""), + "- falling back to estimation", + ) + return null + } + + const result = (await res.json()) as { input_tokens: number } + consola.info("Token count (Anthropic API):", result.input_tokens) + return c.json(result) +} + +/** + * Handles token counting for Anthropic messages. + * + * When an Anthropic API key is available (via config or ANTHROPIC_API_KEY env var) + * and the model is a Claude model, forwards to Anthropic's free /v1/messages/count_tokens + * endpoint for accurate counts. Otherwise falls back to GPT tokenizer estimation. */ export async function handleCountTokens(c: Context) { try { - const anthropicBeta = c.req.header("anthropic-beta") - const anthropicPayload = await c.req.json() + // Try Anthropic's real endpoint first (Claude models only) + const anthropicResult = await countTokensViaAnthropic(c, anthropicPayload) + if (anthropicResult) return anthropicResult + + // Fallback: GPT tokenizer estimation (also used for non-Claude models) + const anthropicBeta = c.req.header("anthropic-beta") + const openAIPayload = translateToOpenAI(anthropicPayload) - const selectedModel = state.models?.data.find( - (model) => model.id === anthropicPayload.model, - ) + const selectedModel = findEndpointModel(anthropicPayload.model) + anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model if (!selectedModel) { consola.warn("Model not found, returning default token count") @@ -33,18 +87,21 @@ export async function handleCountTokens(c: Context) { const tokenCount = await getTokenCount(openAIPayload, selectedModel) if (anthropicPayload.tools && anthropicPayload.tools.length > 0) { - let mcpToolExist = false - if (anthropicBeta?.startsWith("claude-code")) { - mcpToolExist = anthropicPayload.tools.some((tool) => - tool.name.startsWith("mcp__"), + let addToolSystemPromptCount = false + if (anthropicBeta) { + const toolsLength = anthropicPayload.tools.length + addToolSystemPromptCount = !anthropicPayload.tools.some( + (tool) => + tool.name.startsWith("mcp__") + || (tool.name === "Skill" && toolsLength === 1), ) } - if (!mcpToolExist) { + if (addToolSystemPromptCount) { if (anthropicPayload.model.startsWith("claude")) { // https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview#pricing tokenCount.input = tokenCount.input + 346 } else if (anthropicPayload.model.startsWith("grok")) { - tokenCount.input = tokenCount.input + 480 + tokenCount.input = tokenCount.input + 120 } } } @@ -52,8 +109,6 @@ export async function handleCountTokens(c: Context) { let finalTokenCount = tokenCount.input + tokenCount.output if (anthropicPayload.model.startsWith("claude")) { finalTokenCount = Math.round(finalTokenCount * 1.15) - } else if (anthropicPayload.model.startsWith("grok")) { - finalTokenCount = Math.round(finalTokenCount * 1.03) } consola.info("Token count:", finalTokenCount) diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..1dd82e828 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -1,69 +1,188 @@ import type { Context } from "hono" -import consola from "consola" import { streamSSE } from "hono/streaming" +import type { Model } from "~/services/copilot/get-models" + import { awaitApproval } from "~/lib/approval" +import { + getSmallModel, + getReasoningEffortForModel, + isMessagesApiEnabled, +} from "~/lib/config" +import { createHandlerLogger } from "~/lib/logger" +import { findEndpointModel } from "~/lib/models" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" +import { generateRequestIdFromPayload, getRootSessionId } from "~/lib/utils" +import { + buildErrorEvent, + createResponsesStreamState, + translateResponsesStreamEvent, +} from "~/routes/messages/responses-stream-translation" +import { + translateAnthropicMessagesToResponsesPayload, + translateResponsesResultToAnthropic, +} from "~/routes/messages/responses-translation" +import { + applyResponsesApiContextManagement, + compactInputByLatestCompaction, + getResponsesRequestOptions, +} from "~/routes/responses/utils" import { createChatCompletions, type ChatCompletionChunk, type ChatCompletionResponse, } from "~/services/copilot/create-chat-completions" +import { createMessages } from "~/services/copilot/create-messages" +import { + createResponses, + type ResponsesResult, + type ResponseStreamEvent, +} from "~/services/copilot/create-responses" + +import type { SubagentMarker } from "./subagent-marker" import { type AnthropicMessagesPayload, type AnthropicStreamState, + type AnthropicTextBlock, + type AnthropicToolResultBlock, } from "./anthropic-types" import { translateToAnthropic, translateToOpenAI, } from "./non-stream-translation" import { translateChunkToAnthropicEvents } from "./stream-translation" +import { parseSubagentMarkerFromFirstUser } from "./subagent-marker" + +const logger = createHandlerLogger("messages-handler") + +const compactSystemPromptStart = + "You are a helpful AI assistant tasked with summarizing conversations" export async function handleCompletion(c: Context) { await checkRateLimit(state) const anthropicPayload = await c.req.json() - consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) + logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) - const openAIPayload = translateToOpenAI(anthropicPayload) - consola.debug( - "Translated OpenAI request payload:", - JSON.stringify(openAIPayload), - ) + const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload) + if (subagentMarker) { + logger.debug("Detected Subagent marker:", JSON.stringify(subagentMarker)) + } + + const sessionId = getRootSessionId(anthropicPayload, c) + logger.debug("Extracted session ID:", sessionId) + + // claude code and opencode compact request detection + const isCompact = isCompactRequest(anthropicPayload) + + // fix claude code 2.0.28+ warmup request consume premium request, forcing small model if no tools are used + const anthropicBeta = c.req.header("anthropic-beta") + logger.debug("Anthropic Beta header:", anthropicBeta) + const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0 + if (anthropicBeta && noTools && !isCompact) { + anthropicPayload.model = getSmallModel() + } + + if (isCompact) { + logger.debug("Is compact request:", isCompact) + } else { + mergeToolResultForClaude(anthropicPayload) + } + + const requestId = generateRequestIdFromPayload(anthropicPayload, sessionId) + logger.debug("Generated request ID:", requestId) if (state.manualApprove) { await awaitApproval() } - const response = await createChatCompletions(openAIPayload) + const selectedModel = findEndpointModel(anthropicPayload.model) + anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model + + if (shouldUseMessagesApi(selectedModel)) { + return await handleWithMessagesApi(c, anthropicPayload, { + anthropicBetaHeader: anthropicBeta, + subagentMarker, + selectedModel, + requestId, + sessionId, + isCompact, + }) + } + + if (shouldUseResponsesApi(selectedModel)) { + return await handleWithResponsesApi(c, anthropicPayload, { + subagentMarker, + selectedModel, + requestId, + sessionId, + isCompact, + }) + } + + return await handleWithChatCompletions(c, anthropicPayload, { + subagentMarker, + requestId, + sessionId, + isCompact, + }) +} + +const RESPONSES_ENDPOINT = "/responses" +const MESSAGES_ENDPOINT = "/v1/messages" + +const handleWithChatCompletions = async ( + c: Context, + anthropicPayload: AnthropicMessagesPayload, + options: { + subagentMarker?: SubagentMarker | null + requestId: string + sessionId?: string + isCompact?: boolean + }, +) => { + const { subagentMarker, requestId, sessionId, isCompact } = options + const openAIPayload = translateToOpenAI(anthropicPayload) + logger.debug( + "Translated OpenAI request payload:", + JSON.stringify(openAIPayload), + ) + + const response = await createChatCompletions(openAIPayload, { + subagentMarker, + requestId, + sessionId, + isCompact, + }) if (isNonStreaming(response)) { - consola.debug( + logger.debug( "Non-streaming response from Copilot:", - JSON.stringify(response).slice(-400), + JSON.stringify(response), ) const anthropicResponse = translateToAnthropic(response) - consola.debug( + logger.debug( "Translated Anthropic response:", JSON.stringify(anthropicResponse), ) return c.json(anthropicResponse) } - consola.debug("Streaming response from Copilot") + logger.debug("Streaming response from Copilot") return streamSSE(c, async (stream) => { const streamState: AnthropicStreamState = { messageStartSent: false, contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } for await (const rawEvent of response) { - consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent)) + logger.debug("Copilot raw stream event:", JSON.stringify(rawEvent)) if (rawEvent.data === "[DONE]") { break } @@ -76,7 +195,7 @@ export async function handleCompletion(c: Context) { const events = translateChunkToAnthropicEvents(chunk, streamState) for (const event of events) { - consola.debug("Translated Anthropic event:", JSON.stringify(event)) + logger.debug("Translated Anthropic event:", JSON.stringify(event)) await stream.writeSSE({ event: event.type, data: JSON.stringify(event), @@ -86,6 +205,303 @@ export async function handleCompletion(c: Context) { }) } +const handleWithResponsesApi = async ( + c: Context, + anthropicPayload: AnthropicMessagesPayload, + options: { + subagentMarker?: SubagentMarker | null + selectedModel?: Model + requestId: string + sessionId?: string + isCompact?: boolean + }, +) => { + const { subagentMarker, selectedModel, requestId, sessionId, isCompact } = + options + + const responsesPayload = + translateAnthropicMessagesToResponsesPayload(anthropicPayload) + + applyResponsesApiContextManagement( + responsesPayload, + selectedModel?.capabilities.limits.max_prompt_tokens, + ) + + compactInputByLatestCompaction(responsesPayload) + + logger.debug( + "Translated Responses payload:", + JSON.stringify(responsesPayload), + ) + + const { vision, initiator } = getResponsesRequestOptions(responsesPayload) + const response = await createResponses(responsesPayload, { + vision, + initiator: initiator, + subagentMarker, + requestId, + sessionId, + isCompact, + }) + + if (responsesPayload.stream && isAsyncIterable(response)) { + logger.debug("Streaming response from Copilot (Responses API)") + return streamSSE(c, async (stream) => { + const streamState = createResponsesStreamState() + + for await (const chunk of response) { + const eventName = chunk.event + if (eventName === "ping") { + await stream.writeSSE({ event: "ping", data: '{"type":"ping"}' }) + continue + } + + const data = chunk.data + if (!data) { + continue + } + + logger.debug("Responses raw stream event:", data) + + const events = translateResponsesStreamEvent( + JSON.parse(data) as ResponseStreamEvent, + streamState, + ) + for (const event of events) { + const eventData = JSON.stringify(event) + logger.debug("Translated Anthropic event:", eventData) + await stream.writeSSE({ + event: event.type, + data: eventData, + }) + } + + if (streamState.messageCompleted) { + logger.debug("Message completed, ending stream") + break + } + } + + if (!streamState.messageCompleted) { + logger.warn( + "Responses stream ended without completion; sending error event", + ) + const errorEvent = buildErrorEvent( + "Responses stream ended without completion", + ) + await stream.writeSSE({ + event: errorEvent.type, + data: JSON.stringify(errorEvent), + }) + } + }) + } + + logger.debug( + "Non-streaming Responses result:", + JSON.stringify(response).slice(-400), + ) + const anthropicResponse = translateResponsesResultToAnthropic( + response as ResponsesResult, + ) + logger.debug( + "Translated Anthropic response:", + JSON.stringify(anthropicResponse), + ) + return c.json(anthropicResponse) +} + +const handleWithMessagesApi = async ( + c: Context, + anthropicPayload: AnthropicMessagesPayload, + options: { + anthropicBetaHeader?: string + subagentMarker?: SubagentMarker | null + selectedModel?: Model + requestId: string + sessionId?: string + isCompact?: boolean + }, +) => { + const { + anthropicBetaHeader, + subagentMarker, + selectedModel, + requestId, + sessionId, + isCompact, + } = options + for (const msg of anthropicPayload.messages) { + if (msg.role === "assistant" && Array.isArray(msg.content)) { + msg.content = msg.content.filter((block) => { + if (block.type !== "thinking") return true + return ( + block.thinking + && block.thinking !== "Thinking..." + && block.signature + && !block.signature.includes("@") + ) + }) + } + } + + const toolChoice = anthropicPayload.tool_choice + const disableThink = toolChoice?.type === "any" || toolChoice?.type === "tool" + + if (selectedModel?.capabilities.supports.adaptive_thinking && !disableThink) { + anthropicPayload.thinking = { + type: "adaptive", + } + anthropicPayload.output_config = { + effort: getAnthropicEffortForModel(anthropicPayload.model), + } + } + + logger.debug("Translated Messages payload:", JSON.stringify(anthropicPayload)) + + const response = await createMessages(anthropicPayload, anthropicBetaHeader, { + subagentMarker, + requestId, + sessionId, + isCompact, + }) + + if (isAsyncIterable(response)) { + logger.debug("Streaming response from Copilot (Messages API)") + return streamSSE(c, async (stream) => { + for await (const event of response) { + const eventName = event.event + const data = event.data ?? "" + logger.debug("Messages raw stream event:", data) + await stream.writeSSE({ + event: eventName, + data, + }) + } + }) + } + + logger.debug( + "Non-streaming Messages result:", + JSON.stringify(response).slice(-400), + ) + return c.json(response) +} + +const shouldUseResponsesApi = (selectedModel: Model | undefined): boolean => { + return ( + selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false + ) +} + +const shouldUseMessagesApi = (selectedModel: Model | undefined): boolean => { + const useMessagesApi = isMessagesApiEnabled() + if (!useMessagesApi) { + return false + } + return ( + selectedModel?.supported_endpoints?.includes(MESSAGES_ENDPOINT) ?? false + ) +} + const isNonStreaming = ( response: Awaited>, ): response is ChatCompletionResponse => Object.hasOwn(response, "choices") + +const isAsyncIterable = (value: unknown): value is AsyncIterable => + Boolean(value) + && typeof (value as AsyncIterable)[Symbol.asyncIterator] === "function" + +const getAnthropicEffortForModel = ( + model: string, +): "low" | "medium" | "high" | "max" => { + const reasoningEffort = getReasoningEffortForModel(model) + + if (reasoningEffort === "xhigh") return "max" + if (reasoningEffort === "none" || reasoningEffort === "minimal") return "low" + + return reasoningEffort +} + +const isCompactRequest = ( + anthropicPayload: AnthropicMessagesPayload, +): boolean => { + const system = anthropicPayload.system + if (typeof system === "string") { + return system.startsWith(compactSystemPromptStart) + } + if (!Array.isArray(system)) return false + + return system.some( + (msg) => + typeof msg.text === "string" + && msg.text.startsWith(compactSystemPromptStart), + ) +} + +const mergeContentWithText = ( + tr: AnthropicToolResultBlock, + textBlock: AnthropicTextBlock, +): AnthropicToolResultBlock => { + if (typeof tr.content === "string") { + return { ...tr, content: `${tr.content}\n\n${textBlock.text}` } + } + return { + ...tr, + content: [...tr.content, textBlock], + } +} + +const mergeContentWithTexts = ( + tr: AnthropicToolResultBlock, + textBlocks: Array, +): AnthropicToolResultBlock => { + if (typeof tr.content === "string") { + const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n") + return { ...tr, content: `${tr.content}\n\n${appendedTexts}` } + } + return { ...tr, content: [...tr.content, ...textBlocks] } +} + +const mergeToolResultForClaude = ( + anthropicPayload: AnthropicMessagesPayload, +): void => { + for (const msg of anthropicPayload.messages) { + if (msg.role !== "user" || !Array.isArray(msg.content)) continue + + const toolResults: Array = [] + const textBlocks: Array = [] + let valid = true + + for (const block of msg.content) { + if (block.type === "tool_result") { + toolResults.push(block) + } else if (block.type === "text") { + textBlocks.push(block) + } else { + valid = false + break + } + } + + if (!valid || toolResults.length === 0 || textBlocks.length === 0) continue + + msg.content = mergeToolResult(toolResults, textBlocks) + } +} + +const mergeToolResult = ( + toolResults: Array, + textBlocks: Array, +): Array => { + // equal lengths -> pairwise merge + if (toolResults.length === textBlocks.length) { + return toolResults.map((tr, i) => mergeContentWithText(tr, textBlocks[i])) + } + + // lengths differ -> append all textBlocks to the last tool_result + const lastIndex = toolResults.length - 1 + return toolResults.map((tr, i) => + i === lastIndex ? mergeContentWithTexts(tr, textBlocks) : tr, + ) +} diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e6382..fdc066c05 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,6 @@ +import type { Model } from "~/services/copilot/get-models" + +import { state } from "~/lib/state" import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -11,7 +14,6 @@ import { import { type AnthropicAssistantContentBlock, type AnthropicAssistantMessage, - type AnthropicMessage, type AnthropicMessagesPayload, type AnthropicResponse, type AnthropicTextBlock, @@ -24,16 +26,22 @@ import { } from "./anthropic-types" import { mapOpenAIStopReasonToAnthropic } from "./utils" -// Payload translation +// Compatible with opencode, it will filter out blocks where the thinking text is empty, so we need add a default thinking text +export const THINKING_TEXT = "Thinking..." +// Payload translation export function translateToOpenAI( payload: AnthropicMessagesPayload, ): ChatCompletionsPayload { + const modelId = payload.model + const model = state.models?.data.find((m) => m.id === modelId) + const thinkingBudget = getThinkingBudget(payload, model) return { - model: translateModelName(payload.model), + model: modelId, messages: translateAnthropicMessagesToOpenAI( - payload.messages, - payload.system, + payload, + modelId, + thinkingBudget, ), max_tokens: payload.max_tokens, stop: payload.stop_sequences, @@ -43,31 +51,43 @@ export function translateToOpenAI( user: payload.metadata?.user_id, tools: translateAnthropicToolsToOpenAI(payload.tools), tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice), + thinking_budget: thinkingBudget, } } -function translateModelName(model: string): string { - // Subagent requests use a specific model number which Copilot doesn't support - if (model.startsWith("claude-sonnet-4-")) { - return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4") - } else if (model.startsWith("claude-opus-")) { - return model.replace(/^claude-opus-4-.*/, "claude-opus-4") +function getThinkingBudget( + payload: AnthropicMessagesPayload, + model: Model | undefined, +): number | undefined { + const thinking = payload.thinking + if (model && thinking) { + const maxThinkingBudget = Math.min( + model.capabilities.supports.max_thinking_budget ?? 0, + (model.capabilities.limits.max_output_tokens ?? 0) - 1, + ) + thinking.budget_tokens ??= maxThinkingBudget + if (maxThinkingBudget > 0) { + const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget) + return Math.max( + budgetTokens, + model.capabilities.supports.min_thinking_budget ?? 1024, + ) + } } - return model + return undefined } function translateAnthropicMessagesToOpenAI( - anthropicMessages: Array, - system: string | Array | undefined, + payload: AnthropicMessagesPayload, + modelId: string, + _thinkingBudget: number | undefined, ): Array { - const systemMessages = handleSystemPrompt(system) - - const otherMessages = anthropicMessages.flatMap((message) => + const systemMessages = handleSystemPrompt(payload.system) + const otherMessages = payload.messages.flatMap((message) => message.role === "user" ? handleUserMessage(message) - : handleAssistantMessage(message), + : handleAssistantMessage(message, modelId), ) - return [...systemMessages, ...otherMessages] } @@ -81,7 +101,11 @@ function handleSystemPrompt( if (typeof system === "string") { return [{ role: "system", content: system }] } else { - const systemText = system.map((block) => block.text).join("\n\n") + const systemText = system + .map((block) => { + return block.text + }) + .join("\n\n") return [{ role: "system", content: systemText }] } } @@ -125,6 +149,7 @@ function handleUserMessage(message: AnthropicUserMessage): Array { function handleAssistantMessage( message: AnthropicAssistantMessage, + modelId: string, ): Array { if (!Array.isArray(message.content)) { return [ @@ -139,25 +164,37 @@ function handleAssistantMessage( (block): block is AnthropicToolUseBlock => block.type === "tool_use", ) - const textBlocks = message.content.filter( - (block): block is AnthropicTextBlock => block.type === "text", - ) - - const thinkingBlocks = message.content.filter( + let thinkingBlocks = message.content.filter( (block): block is AnthropicThinkingBlock => block.type === "thinking", ) - // Combine text and thinking blocks, as OpenAI doesn't have separate thinking blocks - const allTextContent = [ - ...textBlocks.map((b) => b.text), - ...thinkingBlocks.map((b) => b.thinking), - ].join("\n\n") + if (modelId.startsWith("claude")) { + thinkingBlocks = thinkingBlocks.filter( + (b) => + b.thinking + && b.thinking !== THINKING_TEXT + && b.signature + // gpt signature has @ in it, so filter those out for claude models + && !b.signature.includes("@"), + ) + } + + const thinkingContents = thinkingBlocks + .filter((b) => b.thinking && b.thinking !== THINKING_TEXT) + .map((b) => b.thinking) + + const allThinkingContent = + thinkingContents.length > 0 ? thinkingContents.join("\n\n") : undefined + + const signature = thinkingBlocks.find((b) => b.signature)?.signature return toolUseBlocks.length > 0 ? [ { role: "assistant", - content: allTextContent || null, + content: mapContent(message.content), + reasoning_text: allThinkingContent, + reasoning_opaque: signature, tool_calls: toolUseBlocks.map((toolUse) => ({ id: toolUse.id, type: "function", @@ -172,6 +209,8 @@ function handleAssistantMessage( { role: "assistant", content: mapContent(message.content), + reasoning_text: allThinkingContent, + reasoning_opaque: signature, }, ] } @@ -191,11 +230,8 @@ function mapContent( const hasImage = content.some((block) => block.type === "image") if (!hasImage) { return content - .filter( - (block): block is AnthropicTextBlock | AnthropicThinkingBlock => - block.type === "text" || block.type === "thinking", - ) - .map((block) => (block.type === "text" ? block.text : block.thinking)) + .filter((block): block is AnthropicTextBlock => block.type === "text") + .map((block) => block.text) .join("\n\n") } @@ -204,12 +240,6 @@ function mapContent( switch (block.type) { case "text": { contentParts.push({ type: "text", text: block.text }) - - break - } - case "thinking": { - contentParts.push({ type: "text", text: block.thinking }) - break } case "image": { @@ -219,7 +249,6 @@ function mapContent( url: `data:${block.source.media_type};base64,${block.source.data}`, }, }) - break } // No default @@ -239,11 +268,24 @@ function translateAnthropicToolsToOpenAI( function: { name: tool.name, description: tool.description, - parameters: tool.input_schema, + parameters: normalizeToolSchema(tool.input_schema), }, })) } +/** + * Ensures `type: "object"` schema has a `properties` field. + * OpenAI's API rejects object schemas without it. + */ +export const normalizeToolSchema = ( + schema: Record, +): Record => { + if (schema.type === "object" && !schema.properties) { + return { ...schema, properties: {} } + } + return schema +} + function translateAnthropicToolChoiceToOpenAI( anthropicToolChoice: AnthropicMessagesPayload["tool_choice"], ): ChatCompletionsPayload["tool_choice"] { @@ -282,19 +324,19 @@ export function translateToAnthropic( response: ChatCompletionResponse, ): AnthropicResponse { // Merge content from all choices - const allTextBlocks: Array = [] - const allToolUseBlocks: Array = [] - let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null = - null // default - stopReason = response.choices[0]?.finish_reason ?? stopReason + const assistantContentBlocks: Array = [] + let stopReason = response.choices[0]?.finish_reason ?? null // Process all choices to extract text and tool use blocks for (const choice of response.choices) { const textBlocks = getAnthropicTextBlocks(choice.message.content) + const thinkBlocks = getAnthropicThinkBlocks( + choice.message.reasoning_text, + choice.message.reasoning_opaque, + ) const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls) - allTextBlocks.push(...textBlocks) - allToolUseBlocks.push(...toolUseBlocks) + assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks) // Use the finish_reason from the first choice, or prioritize tool_calls if (choice.finish_reason === "tool_calls" || stopReason === "stop") { @@ -302,14 +344,12 @@ export function translateToAnthropic( } } - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses - return { id: response.id, type: "message", role: "assistant", model: response.model, - content: [...allTextBlocks, ...allToolUseBlocks], + content: assistantContentBlocks, stop_reason: mapOpenAIStopReasonToAnthropic(stopReason), stop_sequence: null, usage: { @@ -329,7 +369,7 @@ export function translateToAnthropic( function getAnthropicTextBlocks( messageContent: Message["content"], ): Array { - if (typeof messageContent === "string") { + if (typeof messageContent === "string" && messageContent.length > 0) { return [{ type: "text", text: messageContent }] } @@ -342,6 +382,31 @@ function getAnthropicTextBlocks( return [] } +function getAnthropicThinkBlocks( + reasoningText: string | null | undefined, + reasoningOpaque: string | null | undefined, +): Array { + if (reasoningText && reasoningText.length > 0) { + return [ + { + type: "thinking", + thinking: reasoningText, + signature: reasoningOpaque || "", + }, + ] + } + if (reasoningOpaque && reasoningOpaque.length > 0) { + return [ + { + type: "thinking", + thinking: THINKING_TEXT, // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here + signature: reasoningOpaque, + }, + ] + } + return [] +} + function getAnthropicToolUseBlocks( toolCalls: Array | undefined, ): Array { diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts new file mode 100644 index 000000000..a8c673c9c --- /dev/null +++ b/src/routes/messages/responses-stream-translation.ts @@ -0,0 +1,742 @@ +import { + type ResponseCompletedEvent, + type ResponseCreatedEvent, + type ResponseErrorEvent, + type ResponseFailedEvent, + type ResponseFunctionCallArgumentsDeltaEvent, + type ResponseFunctionCallArgumentsDoneEvent, + type ResponseIncompleteEvent, + type ResponseOutputItemAddedEvent, + type ResponseOutputItemDoneEvent, + type ResponseReasoningSummaryTextDeltaEvent, + type ResponseReasoningSummaryTextDoneEvent, + type ResponsesResult, + type ResponseStreamEvent, + type ResponseTextDeltaEvent, + type ResponseTextDoneEvent, +} from "~/services/copilot/create-responses" + +import { type AnthropicStreamEventData } from "./anthropic-types" +import { + THINKING_TEXT, + encodeCompactionCarrierSignature, + translateResponsesResultToAnthropic, +} from "./responses-translation" + +const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20 + +class FunctionCallArgumentsValidationError extends Error { + constructor(message: string) { + super(message) + this.name = "FunctionCallArgumentsValidationError" + } +} + +const updateWhitespaceRunState = ( + previousCount: number, + chunk: string, +): { + nextCount: number + exceeded: boolean +} => { + let count = previousCount + + for (const char of chunk) { + if (char === "\r" || char === "\n" || char === "\t") { + count += 1 + if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) { + return { nextCount: count, exceeded: true } + } + continue + } + + if (char !== " ") { + count = 0 + } + } + + return { nextCount: count, exceeded: false } +} + +export interface ResponsesStreamState { + messageStartSent: boolean + messageCompleted: boolean + nextContentBlockIndex: number + blockIndexByKey: Map + openBlocks: Set + blockHasDelta: Set + functionCallStateByOutputIndex: Map +} + +type FunctionCallStreamState = { + blockIndex: number + toolCallId: string + name: string + consecutiveWhitespaceCount: number +} + +export const createResponsesStreamState = (): ResponsesStreamState => ({ + messageStartSent: false, + messageCompleted: false, + nextContentBlockIndex: 0, + blockIndexByKey: new Map(), + openBlocks: new Set(), + blockHasDelta: new Set(), + functionCallStateByOutputIndex: new Map(), +}) + +export const translateResponsesStreamEvent = ( + rawEvent: ResponseStreamEvent, + state: ResponsesStreamState, +): Array => { + const eventType = rawEvent.type + switch (eventType) { + case "response.created": { + return handleResponseCreated(rawEvent, state) + } + + case "response.output_item.added": { + return handleOutputItemAdded(rawEvent, state) + } + + case "response.reasoning_summary_text.delta": { + return handleReasoningSummaryTextDelta(rawEvent, state) + } + + case "response.output_text.delta": { + return handleOutputTextDelta(rawEvent, state) + } + + case "response.reasoning_summary_text.done": { + return handleReasoningSummaryTextDone(rawEvent, state) + } + + case "response.output_text.done": { + return handleOutputTextDone(rawEvent, state) + } + case "response.output_item.done": { + return handleOutputItemDone(rawEvent, state) + } + + case "response.function_call_arguments.delta": { + return handleFunctionCallArgumentsDelta(rawEvent, state) + } + + case "response.function_call_arguments.done": { + return handleFunctionCallArgumentsDone(rawEvent, state) + } + + case "response.completed": + case "response.incomplete": { + return handleResponseCompleted(rawEvent, state) + } + + case "response.failed": { + return handleResponseFailed(rawEvent, state) + } + + case "error": { + return handleErrorEvent(rawEvent, state) + } + + default: { + return [] + } + } +} + +// Helper handlers to keep translateResponsesStreamEvent concise +const handleResponseCreated = ( + rawEvent: ResponseCreatedEvent, + state: ResponsesStreamState, +): Array => { + return messageStart(state, rawEvent.response) +} + +const handleOutputItemAdded = ( + rawEvent: ResponseOutputItemAddedEvent, + state: ResponsesStreamState, +): Array => { + const events = new Array() + const functionCallDetails = extractFunctionCallDetails(rawEvent) + if (!functionCallDetails) { + return events + } + + const { outputIndex, toolCallId, name, initialArguments } = + functionCallDetails + const blockIndex = openFunctionCallBlock(state, { + outputIndex, + toolCallId, + name, + events, + }) + + if (initialArguments !== undefined && initialArguments.length > 0) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "input_json_delta", + partial_json: initialArguments, + }, + }) + state.blockHasDelta.add(blockIndex) + } + + return events +} + +const handleOutputItemDone = ( + rawEvent: ResponseOutputItemDoneEvent, + state: ResponsesStreamState, +): Array => { + const events = new Array() + const item = rawEvent.item + const itemType = item.type + const outputIndex = rawEvent.output_index + + if (itemType === "compaction") { + if (!item.id || !item.encrypted_content) { + return events + } + + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + + if (!state.blockHasDelta.has(blockIndex)) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "thinking_delta", + thinking: THINKING_TEXT, + }, + }) + } + + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "signature_delta", + signature: encodeCompactionCarrierSignature({ + id: item.id, + encrypted_content: item.encrypted_content, + }), + }, + }) + state.blockHasDelta.add(blockIndex) + return events + } + + if (itemType !== "reasoning") { + return events + } + + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + const signature = (item.encrypted_content ?? "") + "@" + item.id + if (signature) { + // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here + if (!item.summary || item.summary.length === 0) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "thinking_delta", + thinking: THINKING_TEXT, + }, + }) + } + + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "signature_delta", + signature, + }, + }) + state.blockHasDelta.add(blockIndex) + } + + return events +} + +const handleFunctionCallArgumentsDelta = ( + rawEvent: ResponseFunctionCallArgumentsDeltaEvent, + state: ResponsesStreamState, +): Array => { + const events = new Array() + const outputIndex = rawEvent.output_index + const deltaText = rawEvent.delta + + if (!deltaText) { + return events + } + + const blockIndex = openFunctionCallBlock(state, { + outputIndex, + events, + }) + + const functionCallState = + state.functionCallStateByOutputIndex.get(outputIndex) + if (!functionCallState) { + return handleFunctionCallArgumentsValidationError( + new FunctionCallArgumentsValidationError( + "Received function call arguments delta without an open tool call block.", + ), + state, + events, + ) + } + + // fix: copolit function call returning infinite line breaks until max_tokens limit + const { nextCount, exceeded } = updateWhitespaceRunState( + functionCallState.consecutiveWhitespaceCount, + deltaText, + ) + if (exceeded) { + return handleFunctionCallArgumentsValidationError( + new FunctionCallArgumentsValidationError( + "Received function call arguments delta containing more than 20 consecutive whitespace characters.", + ), + state, + events, + ) + } + functionCallState.consecutiveWhitespaceCount = nextCount + + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "input_json_delta", + partial_json: deltaText, + }, + }) + state.blockHasDelta.add(blockIndex) + + return events +} + +const handleFunctionCallArgumentsDone = ( + rawEvent: ResponseFunctionCallArgumentsDoneEvent, + state: ResponsesStreamState, +): Array => { + const events = new Array() + const outputIndex = rawEvent.output_index + const blockIndex = openFunctionCallBlock(state, { + outputIndex, + events, + }) + + const finalArguments = + typeof rawEvent.arguments === "string" ? rawEvent.arguments : undefined + + if (!state.blockHasDelta.has(blockIndex) && finalArguments) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "input_json_delta", + partial_json: finalArguments, + }, + }) + state.blockHasDelta.add(blockIndex) + } + + state.functionCallStateByOutputIndex.delete(outputIndex) + return events +} + +const handleOutputTextDelta = ( + rawEvent: ResponseTextDeltaEvent, + state: ResponsesStreamState, +): Array => { + const events = new Array() + const outputIndex = rawEvent.output_index + const contentIndex = rawEvent.content_index + const deltaText = rawEvent.delta + + if (!deltaText) { + return events + } + + const blockIndex = openTextBlockIfNeeded(state, { + outputIndex, + contentIndex, + events, + }) + + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "text_delta", + text: deltaText, + }, + }) + state.blockHasDelta.add(blockIndex) + + return events +} + +const handleReasoningSummaryTextDelta = ( + rawEvent: ResponseReasoningSummaryTextDeltaEvent, + state: ResponsesStreamState, +): Array => { + const outputIndex = rawEvent.output_index + const deltaText = rawEvent.delta + const events = new Array() + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "thinking_delta", + thinking: deltaText, + }, + }) + state.blockHasDelta.add(blockIndex) + + return events +} + +const handleReasoningSummaryTextDone = ( + rawEvent: ResponseReasoningSummaryTextDoneEvent, + state: ResponsesStreamState, +): Array => { + const outputIndex = rawEvent.output_index + const text = rawEvent.text + const events = new Array() + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + + if (text && !state.blockHasDelta.has(blockIndex)) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "thinking_delta", + thinking: text, + }, + }) + } + + return events +} + +const handleOutputTextDone = ( + rawEvent: ResponseTextDoneEvent, + state: ResponsesStreamState, +): Array => { + const events = new Array() + const outputIndex = rawEvent.output_index + const contentIndex = rawEvent.content_index + const text = rawEvent.text + + const blockIndex = openTextBlockIfNeeded(state, { + outputIndex, + contentIndex, + events, + }) + + if (text && !state.blockHasDelta.has(blockIndex)) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "text_delta", + text, + }, + }) + } + + return events +} + +const handleResponseCompleted = ( + rawEvent: ResponseCompletedEvent | ResponseIncompleteEvent, + state: ResponsesStreamState, +): Array => { + const response = rawEvent.response + const events = new Array() + + closeAllOpenBlocks(state, events) + const anthropic = translateResponsesResultToAnthropic(response) + events.push( + { + type: "message_delta", + delta: { + stop_reason: anthropic.stop_reason, + stop_sequence: anthropic.stop_sequence, + }, + usage: anthropic.usage, + }, + { type: "message_stop" }, + ) + state.messageCompleted = true + return events +} + +const handleResponseFailed = ( + rawEvent: ResponseFailedEvent, + state: ResponsesStreamState, +): Array => { + const response = rawEvent.response + const events = new Array() + closeAllOpenBlocks(state, events) + + const message = + response.error?.message ?? "The response failed due to an unknown error." + + events.push(buildErrorEvent(message)) + state.messageCompleted = true + + return events +} + +const handleErrorEvent = ( + rawEvent: ResponseErrorEvent, + state: ResponsesStreamState, +): Array => { + const message = + typeof rawEvent.message === "string" ? + rawEvent.message + : "An unexpected error occurred during streaming." + + state.messageCompleted = true + return [buildErrorEvent(message)] +} + +const handleFunctionCallArgumentsValidationError = ( + error: FunctionCallArgumentsValidationError, + state: ResponsesStreamState, + events: Array = [], +): Array => { + const reason = error.message + + closeAllOpenBlocks(state, events) + state.messageCompleted = true + + events.push(buildErrorEvent(reason)) + + return events +} + +const messageStart = ( + state: ResponsesStreamState, + response: ResponsesResult, +): Array => { + state.messageStartSent = true + const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens + const inputTokens = + (response.usage?.input_tokens ?? 0) - (inputCachedTokens ?? 0) + return [ + { + type: "message_start", + message: { + id: response.id, + type: "message", + role: "assistant", + content: [], + model: response.model, + stop_reason: null, + stop_sequence: null, + usage: { + input_tokens: inputTokens, + output_tokens: 0, + cache_read_input_tokens: inputCachedTokens ?? 0, + }, + }, + }, + ] +} + +const openTextBlockIfNeeded = ( + state: ResponsesStreamState, + params: { + outputIndex: number + contentIndex: number + events: Array + }, +): number => { + const { outputIndex, contentIndex, events } = params + const key = getBlockKey(outputIndex, contentIndex) + let blockIndex = state.blockIndexByKey.get(key) + + if (blockIndex === undefined) { + blockIndex = state.nextContentBlockIndex + state.nextContentBlockIndex += 1 + state.blockIndexByKey.set(key, blockIndex) + } + + if (!state.openBlocks.has(blockIndex)) { + closeOpenBlocks(state, events) + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { + type: "text", + text: "", + }, + }) + state.openBlocks.add(blockIndex) + } + + return blockIndex +} + +const openThinkingBlockIfNeeded = ( + state: ResponsesStreamState, + outputIndex: number, + events: Array, +): number => { + //thinking blocks has multiple summary_index, should combine into one block + const summaryIndex = 0 + const key = getBlockKey(outputIndex, summaryIndex) + let blockIndex = state.blockIndexByKey.get(key) + + if (blockIndex === undefined) { + blockIndex = state.nextContentBlockIndex + state.nextContentBlockIndex += 1 + state.blockIndexByKey.set(key, blockIndex) + } + + if (!state.openBlocks.has(blockIndex)) { + closeOpenBlocks(state, events) + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }) + state.openBlocks.add(blockIndex) + } + + return blockIndex +} + +const closeBlockIfOpen = ( + state: ResponsesStreamState, + blockIndex: number, + events: Array, +) => { + if (!state.openBlocks.has(blockIndex)) { + return + } + + events.push({ type: "content_block_stop", index: blockIndex }) + state.openBlocks.delete(blockIndex) + state.blockHasDelta.delete(blockIndex) +} + +const closeOpenBlocks = ( + state: ResponsesStreamState, + events: Array, +) => { + for (const blockIndex of state.openBlocks) { + closeBlockIfOpen(state, blockIndex, events) + } +} + +const closeAllOpenBlocks = ( + state: ResponsesStreamState, + events: Array, +) => { + closeOpenBlocks(state, events) + + state.functionCallStateByOutputIndex.clear() +} + +export const buildErrorEvent = (message: string): AnthropicStreamEventData => ({ + type: "error", + error: { + type: "api_error", + message, + }, +}) + +const getBlockKey = (outputIndex: number, contentIndex: number): string => + `${outputIndex}:${contentIndex}` + +const openFunctionCallBlock = ( + state: ResponsesStreamState, + params: { + outputIndex: number + toolCallId?: string + name?: string + events: Array + }, +): number => { + const { outputIndex, toolCallId, name, events } = params + + let functionCallState = state.functionCallStateByOutputIndex.get(outputIndex) + + if (!functionCallState) { + const blockIndex = state.nextContentBlockIndex + state.nextContentBlockIndex += 1 + + const resolvedToolCallId = toolCallId ?? `tool_call_${blockIndex}` + const resolvedName = name ?? "function" + + functionCallState = { + blockIndex, + toolCallId: resolvedToolCallId, + name: resolvedName, + consecutiveWhitespaceCount: 0, + } + + state.functionCallStateByOutputIndex.set(outputIndex, functionCallState) + } + + const { blockIndex } = functionCallState + + if (!state.openBlocks.has(blockIndex)) { + closeOpenBlocks(state, events) + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { + type: "tool_use", + id: functionCallState.toolCallId, + name: functionCallState.name, + input: {}, + }, + }) + state.openBlocks.add(blockIndex) + } + + return blockIndex +} + +type FunctionCallDetails = { + outputIndex: number + toolCallId: string + name: string + initialArguments?: string +} + +const extractFunctionCallDetails = ( + rawEvent: ResponseOutputItemAddedEvent, +): FunctionCallDetails | undefined => { + const item = rawEvent.item + const itemType = item.type + if (itemType !== "function_call") { + return undefined + } + + const outputIndex = rawEvent.output_index + const toolCallId = item.call_id + const name = item.name + const initialArguments = item.arguments + return { + outputIndex, + toolCallId, + name, + initialArguments, + } +} diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts new file mode 100644 index 000000000..d53f8b94c --- /dev/null +++ b/src/routes/messages/responses-translation.ts @@ -0,0 +1,781 @@ +import consola from "consola" + +import { + getExtraPromptForModel, + getReasoningEffortForModel, +} from "~/lib/config" +import { parseUserIdMetadata } from "~/lib/utils" +import { + type ResponsesPayload, + type ResponseInputCompaction, + type ResponseInputContent, + type ResponseInputImage, + type ResponseInputItem, + type ResponseInputMessage, + type ResponseInputReasoning, + type ResponseInputText, + type ResponsesResult, + type ResponseOutputContentBlock, + type ResponseOutputCompaction, + type ResponseOutputFunctionCall, + type ResponseOutputItem, + type ResponseOutputReasoning, + type ResponseReasoningBlock, + type ResponseOutputRefusal, + type ResponseOutputText, + type ResponseFunctionToolCallItem, + type ResponseFunctionCallOutputItem, + type Tool, + type ToolChoiceFunction, + type ToolChoiceOptions, +} from "~/services/copilot/create-responses" + +import { + type AnthropicAssistantContentBlock, + type AnthropicAssistantMessage, + type AnthropicResponse, + type AnthropicImageBlock, + type AnthropicMessage, + type AnthropicMessagesPayload, + type AnthropicTextBlock, + type AnthropicThinkingBlock, + type AnthropicTool, + type AnthropicToolResultBlock, + type AnthropicToolUseBlock, + type AnthropicUserContentBlock, + type AnthropicUserMessage, +} from "./anthropic-types" +import { normalizeToolSchema } from "./non-stream-translation" + +const MESSAGE_TYPE = "message" +const COMPACTION_SIGNATURE_PREFIX = "cm1#" +const COMPACTION_SIGNATURE_SEPARATOR = "@" + +export const THINKING_TEXT = "Thinking..." + +export const translateAnthropicMessagesToResponsesPayload = ( + payload: AnthropicMessagesPayload, +): ResponsesPayload => { + const input: Array = [] + const applyPhase = shouldApplyPhase(payload.model) + + for (const message of payload.messages) { + input.push(...translateMessage(message, payload.model, applyPhase)) + } + + const translatedTools = convertAnthropicTools(payload.tools) + const toolChoice = convertAnthropicToolChoice(payload.tool_choice) + + const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata( + payload.metadata?.user_id, + ) + + const responsesPayload: ResponsesPayload = { + model: payload.model, + input, + instructions: translateSystemPrompt(payload.system, payload.model), + temperature: 1, // reasoning high temperature fixed to 1 + top_p: payload.top_p ?? null, + max_output_tokens: Math.max(payload.max_tokens, 12800), + tools: translatedTools, + tool_choice: toolChoice, + metadata: payload.metadata ? { ...payload.metadata } : null, + safety_identifier: safetyIdentifier, + prompt_cache_key: promptCacheKey, + stream: payload.stream ?? null, + store: false, + parallel_tool_calls: true, + reasoning: { + effort: getReasoningEffortForModel(payload.model), + summary: "detailed", + }, + include: ["reasoning.encrypted_content"], + } + + return responsesPayload +} + +type CompactionCarrier = { + id: string + encrypted_content: string +} + +export const encodeCompactionCarrierSignature = ( + compaction: CompactionCarrier, +): string => { + return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}` +} + +export const decodeCompactionCarrierSignature = ( + signature: string, +): CompactionCarrier | undefined => { + if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) { + const raw = signature.slice(COMPACTION_SIGNATURE_PREFIX.length) + const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR) + + if (separatorIndex <= 0 || separatorIndex === raw.length - 1) { + return undefined + } + + const encrypted_content = raw.slice(0, separatorIndex) + const id = raw.slice(separatorIndex + 1) + + if (!encrypted_content) { + return undefined + } + + return { + id, + encrypted_content, + } + } + + return undefined +} + +const translateMessage = ( + message: AnthropicMessage, + model: string, + applyPhase: boolean, +): Array => { + if (message.role === "user") { + return translateUserMessage(message) + } + + return translateAssistantMessage(message, model, applyPhase) +} + +const translateUserMessage = ( + message: AnthropicUserMessage, +): Array => { + if (typeof message.content === "string") { + return [createMessage("user", message.content)] + } + + if (!Array.isArray(message.content)) { + return [] + } + + const items: Array = [] + const pendingContent: Array = [] + + for (const block of message.content) { + if (block.type === "tool_result") { + flushPendingContent(pendingContent, items, { role: "user" }) + items.push(createFunctionCallOutput(block)) + continue + } + + const converted = translateUserContentBlock(block) + if (converted) { + pendingContent.push(converted) + } + } + + flushPendingContent(pendingContent, items, { role: "user" }) + + return items +} + +const translateAssistantMessage = ( + message: AnthropicAssistantMessage, + model: string, + applyPhase: boolean, +): Array => { + const assistantPhase = resolveAssistantPhase( + model, + message.content, + applyPhase, + ) + + if (typeof message.content === "string") { + return [createMessage("assistant", message.content, assistantPhase)] + } + + if (!Array.isArray(message.content)) { + return [] + } + + const items: Array = [] + const pendingContent: Array = [] + + for (const block of message.content) { + if (block.type === "tool_use") { + flushPendingContent(pendingContent, items, { + role: "assistant", + phase: assistantPhase, + }) + items.push(createFunctionToolCall(block)) + continue + } + + if (block.type === "thinking" && block.signature) { + const compactionContent = createCompactionContent(block) + if (compactionContent) { + flushPendingContent(pendingContent, items, { + role: "assistant", + phase: assistantPhase, + }) + items.push(compactionContent) + continue + } + + if (block.signature.includes("@")) { + flushPendingContent(pendingContent, items, { + role: "assistant", + phase: assistantPhase, + }) + items.push(createReasoningContent(block)) + continue + } + } + + const converted = translateAssistantContentBlock(block) + if (converted) { + pendingContent.push(converted) + } + } + + flushPendingContent(pendingContent, items, { + role: "assistant", + phase: assistantPhase, + }) + + return items +} + +const translateUserContentBlock = ( + block: AnthropicUserContentBlock, +): ResponseInputContent | undefined => { + switch (block.type) { + case "text": { + return createTextContent(block.text) + } + case "image": { + return createImageContent(block) + } + default: { + return undefined + } + } +} + +const translateAssistantContentBlock = ( + block: AnthropicAssistantContentBlock, +): ResponseInputContent | undefined => { + switch (block.type) { + case "text": { + return createOutPutTextContent(block.text) + } + default: { + return undefined + } + } +} + +const flushPendingContent = ( + pendingContent: Array, + target: Array, + message: Pick, +) => { + if (pendingContent.length === 0) { + return + } + + const messageContent = [...pendingContent] + + target.push(createMessage(message.role, messageContent, message.phase)) + pendingContent.length = 0 +} + +const createMessage = ( + role: ResponseInputMessage["role"], + content: string | Array, + phase?: ResponseInputMessage["phase"], +): ResponseInputMessage => ({ + type: MESSAGE_TYPE, + role, + content, + ...(role === "assistant" && phase ? { phase } : {}), +}) + +const resolveAssistantPhase = ( + _model: string, + content: AnthropicAssistantMessage["content"], + applyPhase: boolean, +): ResponseInputMessage["phase"] | undefined => { + if (!applyPhase) { + return undefined + } + + if (typeof content === "string") { + return "final_answer" + } + + if (!Array.isArray(content)) { + return undefined + } + + const hasText = content.some((block) => block.type === "text") + if (!hasText) { + return undefined + } + + const hasToolUse = content.some((block) => block.type === "tool_use") + return hasToolUse ? "commentary" : "final_answer" +} + +const shouldApplyPhase = (model: string): boolean => { + const extraPrompt = getExtraPromptForModel(model) + return extraPrompt.includes("## Intermediary updates") +} + +const createTextContent = (text: string): ResponseInputText => ({ + type: "input_text", + text, +}) + +const createOutPutTextContent = (text: string): ResponseInputText => ({ + type: "output_text", + text, +}) + +const createImageContent = ( + block: AnthropicImageBlock, +): ResponseInputImage => ({ + type: "input_image", + image_url: `data:${block.source.media_type};base64,${block.source.data}`, + detail: "auto", +}) + +const createReasoningContent = ( + block: AnthropicThinkingBlock, +): ResponseInputReasoning => { + const { encryptedContent, id } = parseReasoningSignature(block.signature) + const thinking = block.thinking === THINKING_TEXT ? "" : block.thinking + return { + id, + type: "reasoning", + summary: thinking ? [{ type: "summary_text", text: thinking }] : [], + encrypted_content: encryptedContent, + } +} + +const createCompactionContent = ( + block: AnthropicThinkingBlock, +): ResponseInputCompaction | undefined => { + const compaction = decodeCompactionCarrierSignature(block.signature) + if (!compaction) { + return undefined + } + + return { + id: compaction.id, + type: "compaction", + encrypted_content: compaction.encrypted_content, + } +} + +const parseReasoningSignature = ( + signature: string, +): { encryptedContent: string; id: string } => { + const splitIndex = signature.lastIndexOf("@") + + if (splitIndex <= 0 || splitIndex === signature.length - 1) { + return { encryptedContent: signature, id: "" } + } + + return { + encryptedContent: signature.slice(0, splitIndex), + id: signature.slice(splitIndex + 1), + } +} + +const createFunctionToolCall = ( + block: AnthropicToolUseBlock, +): ResponseFunctionToolCallItem => ({ + type: "function_call", + call_id: block.id, + name: block.name, + arguments: JSON.stringify(block.input), + status: "completed", +}) + +const createFunctionCallOutput = ( + block: AnthropicToolResultBlock, +): ResponseFunctionCallOutputItem => ({ + type: "function_call_output", + call_id: block.tool_use_id, + output: convertToolResultContent(block.content), + status: block.is_error ? "incomplete" : "completed", +}) + +const translateSystemPrompt = ( + system: string | Array | undefined, + model: string, +): string | null => { + if (!system) { + return null + } + + const extraPrompt = getExtraPromptForModel(model) + + if (typeof system === "string") { + return system + extraPrompt + } + + const text = system + .map((block, index) => { + if (index === 0) { + return block.text + extraPrompt + } + return block.text + }) + .join(" ") + return text.length > 0 ? text : null +} + +const convertAnthropicTools = ( + tools: Array | undefined, +): Array | null => { + if (!tools || tools.length === 0) { + return null + } + + return tools.map((tool) => ({ + type: "function", + name: tool.name, + parameters: normalizeToolSchema(tool.input_schema), + strict: false, + ...(tool.description ? { description: tool.description } : {}), + })) +} + +const convertAnthropicToolChoice = ( + choice: AnthropicMessagesPayload["tool_choice"], +): ToolChoiceOptions | ToolChoiceFunction => { + if (!choice) { + return "auto" + } + + switch (choice.type) { + case "auto": { + return "auto" + } + case "any": { + return "required" + } + case "tool": { + return choice.name ? { type: "function", name: choice.name } : "auto" + } + case "none": { + return "none" + } + default: { + return "auto" + } + } +} + +export const translateResponsesResultToAnthropic = ( + response: ResponsesResult, +): AnthropicResponse => { + const contentBlocks = mapOutputToAnthropicContent(response.output) + const usage = mapResponsesUsage(response) + let anthropicContent = fallbackContentBlocks(response.output_text) + if (contentBlocks.length > 0) { + anthropicContent = contentBlocks + } + + const stopReason = mapResponsesStopReason(response) + + return { + id: response.id, + type: "message", + role: "assistant", + content: anthropicContent, + model: response.model, + stop_reason: stopReason, + stop_sequence: null, + usage, + } +} + +const mapOutputToAnthropicContent = ( + output: Array, +): Array => { + const contentBlocks: Array = [] + + for (const item of output) { + switch (item.type) { + case "reasoning": { + const thinkingText = extractReasoningText(item) + if (thinkingText.length > 0) { + contentBlocks.push({ + type: "thinking", + thinking: thinkingText, + signature: (item.encrypted_content ?? "") + "@" + item.id, + }) + } + break + } + case "function_call": { + const toolUseBlock = createToolUseContentBlock(item) + if (toolUseBlock) { + contentBlocks.push(toolUseBlock) + } + break + } + case "message": { + const combinedText = combineMessageTextContent(item.content) + if (combinedText.length > 0) { + contentBlocks.push({ type: "text", text: combinedText }) + } + break + } + case "compaction": { + const compactionBlock = createCompactionThinkingBlock(item) + if (compactionBlock) { + contentBlocks.push(compactionBlock) + } + break + } + default: { + // Future compatibility for unrecognized output item types. + const combinedText = combineMessageTextContent( + (item as { content?: Array }).content, + ) + if (combinedText.length > 0) { + contentBlocks.push({ type: "text", text: combinedText }) + } + } + } + } + + return contentBlocks +} + +const combineMessageTextContent = ( + content: Array | undefined, +): string => { + if (!Array.isArray(content)) { + return "" + } + + let aggregated = "" + + for (const block of content) { + if (isResponseOutputText(block)) { + aggregated += block.text + continue + } + + if (isResponseOutputRefusal(block)) { + aggregated += block.refusal + continue + } + + if (typeof (block as { text?: unknown }).text === "string") { + aggregated += (block as { text: string }).text + continue + } + + if (typeof (block as { reasoning?: unknown }).reasoning === "string") { + aggregated += (block as { reasoning: string }).reasoning + continue + } + } + + return aggregated +} + +const extractReasoningText = (item: ResponseOutputReasoning): string => { + const segments: Array = [] + + const collectFromBlocks = (blocks?: Array) => { + if (!Array.isArray(blocks)) { + return + } + + for (const block of blocks) { + if (typeof block.text === "string") { + segments.push(block.text) + continue + } + } + } + + // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here + if (!item.summary || item.summary.length === 0) { + return THINKING_TEXT + } + + collectFromBlocks(item.summary) + + return segments.join("").trim() +} + +const createToolUseContentBlock = ( + call: ResponseOutputFunctionCall, +): AnthropicToolUseBlock | null => { + const toolId = call.call_id + if (!call.name || !toolId) { + return null + } + + const input = parseFunctionCallArguments(call.arguments) + + return { + type: "tool_use", + id: toolId, + name: call.name, + input, + } +} + +const createCompactionThinkingBlock = ( + item: ResponseOutputCompaction, +): AnthropicAssistantContentBlock | null => { + if (!item.id || !item.encrypted_content) { + return null + } + + return { + type: "thinking", + thinking: THINKING_TEXT, + signature: encodeCompactionCarrierSignature({ + id: item.id, + encrypted_content: item.encrypted_content, + }), + } +} + +const parseFunctionCallArguments = ( + rawArguments: string, +): Record => { + if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) { + return {} + } + + try { + const parsed: unknown = JSON.parse(rawArguments) + + if (Array.isArray(parsed)) { + return { arguments: parsed } + } + + if (parsed && typeof parsed === "object") { + return parsed as Record + } + } catch (error) { + consola.warn("Failed to parse function call arguments", { + error, + rawArguments, + }) + } + + return { raw_arguments: rawArguments } +} + +const fallbackContentBlocks = ( + outputText: string, +): Array => { + if (!outputText) { + return [] + } + + return [ + { + type: "text", + text: outputText, + }, + ] +} + +const mapResponsesStopReason = ( + response: ResponsesResult, +): AnthropicResponse["stop_reason"] => { + const { status, incomplete_details: incompleteDetails } = response + + if (status === "completed") { + if (response.output.some((item) => item.type === "function_call")) { + return "tool_use" + } + return "end_turn" + } + + if (status === "incomplete") { + if (incompleteDetails?.reason === "max_output_tokens") { + return "max_tokens" + } + if (incompleteDetails?.reason === "content_filter") { + return "end_turn" + } + } + + return null +} + +const mapResponsesUsage = ( + response: ResponsesResult, +): AnthropicResponse["usage"] => { + const inputTokens = response.usage?.input_tokens ?? 0 + const outputTokens = response.usage?.output_tokens ?? 0 + const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens + + return { + input_tokens: inputTokens - (inputCachedTokens ?? 0), + output_tokens: outputTokens, + ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && { + cache_read_input_tokens: + response.usage.input_tokens_details.cached_tokens, + }), + } +} + +const isRecord = (value: unknown): value is Record => + typeof value === "object" && value !== null + +const isResponseOutputText = ( + block: ResponseOutputContentBlock, +): block is ResponseOutputText => + isRecord(block) + && "type" in block + && (block as { type?: unknown }).type === "output_text" + +const isResponseOutputRefusal = ( + block: ResponseOutputContentBlock, +): block is ResponseOutputRefusal => + isRecord(block) + && "type" in block + && (block as { type?: unknown }).type === "refusal" + +const convertToolResultContent = ( + content: string | Array, +): string | Array => { + if (typeof content === "string") { + return content + } + + if (Array.isArray(content)) { + const result: Array = [] + for (const block of content) { + switch (block.type) { + case "text": { + result.push(createTextContent(block.text)) + break + } + case "image": { + result.push(createImageContent(block)) + break + } + default: { + break + } + } + } + return result + } + + return "" +} diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts index 55094448f..57849828e 100644 --- a/src/routes/messages/stream-translation.ts +++ b/src/routes/messages/stream-translation.ts @@ -1,9 +1,14 @@ -import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions" +import { + type ChatCompletionChunk, + type Choice, + type Delta, +} from "~/services/copilot/create-chat-completions" import { type AnthropicStreamEventData, type AnthropicStreamState, } from "./anthropic-types" +import { THINKING_TEXT } from "./non-stream-translation" import { mapOpenAIStopReasonToAnthropic } from "./utils" function isToolBlockOpen(state: AnthropicStreamState): boolean { @@ -16,7 +21,6 @@ function isToolBlockOpen(state: AnthropicStreamState): boolean { ) } -// eslint-disable-next-line max-lines-per-function, complexity export function translateChunkToAnthropicEvents( chunk: ChatCompletionChunk, state: AnthropicStreamState, @@ -30,22 +34,54 @@ export function translateChunkToAnthropicEvents( const choice = chunk.choices[0] const { delta } = choice - if (!state.messageStartSent) { - events.push({ - type: "message_start", - message: { - id: chunk.id, - type: "message", - role: "assistant", - content: [], - model: chunk.model, - stop_reason: null, - stop_sequence: null, + handleMessageStart(state, events, chunk) + + handleThinkingText(delta, state, events) + + handleContent(delta, state, events) + + handleToolCalls(delta, state, events) + + handleFinish(choice, state, { events, chunk }) + + return events +} + +function handleFinish( + choice: Choice, + state: AnthropicStreamState, + context: { + events: Array + chunk: ChatCompletionChunk + }, +) { + const { events, chunk } = context + if (choice.finish_reason && choice.finish_reason.length > 0) { + if (state.contentBlockOpen) { + const toolBlockOpen = isToolBlockOpen(state) + context.events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockOpen = false + state.contentBlockIndex++ + if (!toolBlockOpen) { + handleReasoningOpaque(choice.delta, events, state) + } + } + + events.push( + { + type: "message_delta", + delta: { + stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason), + stop_sequence: null, + }, usage: { input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0), - output_tokens: 0, // Will be updated in message_delta when finished + output_tokens: chunk.usage?.completion_tokens ?? 0, ...(chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined && { cache_read_input_tokens: @@ -53,44 +89,23 @@ export function translateChunkToAnthropicEvents( }), }, }, - }) - state.messageStartSent = true + { + type: "message_stop", + }, + ) } +} - if (delta.content) { - if (isToolBlockOpen(state)) { - // A tool block was open, so close it before starting a text block. - events.push({ - type: "content_block_stop", - index: state.contentBlockIndex, - }) - state.contentBlockIndex++ - state.contentBlockOpen = false - } - - if (!state.contentBlockOpen) { - events.push({ - type: "content_block_start", - index: state.contentBlockIndex, - content_block: { - type: "text", - text: "", - }, - }) - state.contentBlockOpen = true - } +function handleToolCalls( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.tool_calls && delta.tool_calls.length > 0) { + closeThinkingBlockIfOpen(state, events) - events.push({ - type: "content_block_delta", - index: state.contentBlockIndex, - delta: { - type: "text_delta", - text: delta.content, - }, - }) - } + handleReasoningOpaqueInToolCalls(state, events, delta) - if (delta.tool_calls) { for (const toolCall of delta.tool_calls) { if (toolCall.id && toolCall.function?.name) { // New tool call starting. @@ -141,28 +156,111 @@ export function translateChunkToAnthropicEvents( } } } +} - if (choice.finish_reason) { - if (state.contentBlockOpen) { +function handleReasoningOpaqueInToolCalls( + state: AnthropicStreamState, + events: Array, + delta: Delta, +) { + if (state.contentBlockOpen && !isToolBlockOpen(state)) { + events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockIndex++ + state.contentBlockOpen = false + } + handleReasoningOpaque(delta, events, state) +} + +function handleContent( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.content && delta.content.length > 0) { + closeThinkingBlockIfOpen(state, events) + + if (isToolBlockOpen(state)) { + // A tool block was open, so close it before starting a text block. events.push({ type: "content_block_stop", index: state.contentBlockIndex, }) + state.contentBlockIndex++ state.contentBlockOpen = false } + if (!state.contentBlockOpen) { + events.push({ + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "text", + text: "", + }, + }) + state.contentBlockOpen = true + } + + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "text_delta", + text: delta.content, + }, + }) + } + + // handle for claude model + if ( + delta.content === "" + && delta.reasoning_opaque + && delta.reasoning_opaque.length > 0 + && state.thinkingBlockOpen + ) { events.push( { - type: "message_delta", + type: "content_block_delta", + index: state.contentBlockIndex, delta: { - stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason), - stop_sequence: null, + type: "signature_delta", + signature: delta.reasoning_opaque, }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, + }, + ) + state.contentBlockIndex++ + state.thinkingBlockOpen = false + } +} + +function handleMessageStart( + state: AnthropicStreamState, + events: Array, + chunk: ChatCompletionChunk, +) { + if (!state.messageStartSent) { + events.push({ + type: "message_start", + message: { + id: chunk.id, + type: "message", + role: "assistant", + content: [], + model: chunk.model, + stop_reason: null, + stop_sequence: null, usage: { input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0), - output_tokens: chunk.usage?.completion_tokens ?? 0, + output_tokens: 0, // Will be updated in message_delta when finished ...(chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined && { cache_read_input_tokens: @@ -170,13 +268,111 @@ export function translateChunkToAnthropicEvents( }), }, }, + }) + state.messageStartSent = true + } +} + +function handleReasoningOpaque( + delta: Delta, + events: Array, + state: AnthropicStreamState, +) { + if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) { + events.push( { - type: "message_stop", + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }, + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: THINKING_TEXT, // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here + }, + }, + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: delta.reasoning_opaque, + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, }, ) + state.contentBlockIndex++ } +} - return events +function handleThinkingText( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.reasoning_text && delta.reasoning_text.length > 0) { + // compatible with copilot API returning content->reasoning_text->reasoning_opaque in different deltas + // this is an extremely abnormal situation, probably a server-side bug + // only occurs in the claude model, with a very low probability of occurrence + if (state.contentBlockOpen) { + delta.content = delta.reasoning_text + delta.reasoning_text = undefined + return + } + + if (!state.thinkingBlockOpen) { + events.push({ + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }) + state.thinkingBlockOpen = true + } + + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: delta.reasoning_text, + }, + }) + } +} + +function closeThinkingBlockIfOpen( + state: AnthropicStreamState, + events: Array, +): void { + if (state.thinkingBlockOpen) { + events.push( + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: "", + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, + }, + ) + state.contentBlockIndex++ + state.thinkingBlockOpen = false + } } export function translateErrorToAnthropicErrorEvent(): AnthropicStreamEventData { diff --git a/src/routes/messages/subagent-marker.ts b/src/routes/messages/subagent-marker.ts new file mode 100644 index 000000000..0d93ce507 --- /dev/null +++ b/src/routes/messages/subagent-marker.ts @@ -0,0 +1,78 @@ +import type { AnthropicMessagesPayload } from "./anthropic-types" + +const subagentMarkerPrefix = "__SUBAGENT_MARKER__" + +export interface SubagentMarker { + session_id: string + agent_id: string + agent_type: string +} + +export const parseSubagentMarkerFromFirstUser = ( + payload: AnthropicMessagesPayload, +): SubagentMarker | null => { + const firstUserMessage = payload.messages.find((msg) => msg.role === "user") + if (!firstUserMessage || !Array.isArray(firstUserMessage.content)) { + return null + } + + for (const block of firstUserMessage.content) { + if (block.type !== "text") { + continue + } + + const marker = parseSubagentMarkerFromSystemReminder(block.text) + if (marker) { + return marker + } + } + + return null +} + +const parseSubagentMarkerFromSystemReminder = ( + text: string, +): SubagentMarker | null => { + const startTag = "" + const endTag = "" + let searchFrom = 0 + + while (true) { + const reminderStart = text.indexOf(startTag, searchFrom) + if (reminderStart === -1) { + break + } + + const contentStart = reminderStart + startTag.length + const reminderEnd = text.indexOf(endTag, contentStart) + if (reminderEnd === -1) { + break + } + + const reminderContent = text.slice(contentStart, reminderEnd) + const markerIndex = reminderContent.indexOf(subagentMarkerPrefix) + if (markerIndex === -1) { + searchFrom = reminderEnd + endTag.length + continue + } + + const markerJson = reminderContent + .slice(markerIndex + subagentMarkerPrefix.length) + .trim() + + try { + const parsed = JSON.parse(markerJson) as SubagentMarker + if (!parsed.session_id || !parsed.agent_id || !parsed.agent_type) { + searchFrom = reminderEnd + endTag.length + continue + } + + return parsed + } catch { + searchFrom = reminderEnd + endTag.length + continue + } + } + + return null +} diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts index 5254e2af7..72a2add8f 100644 --- a/src/routes/models/route.ts +++ b/src/routes/models/route.ts @@ -14,6 +14,7 @@ modelRoutes.get("/", async (c) => { } const models = state.models?.data.map((model) => ({ + ...model, id: model.id, object: "model", type: "model", diff --git a/src/routes/provider/messages/count-tokens-handler.ts b/src/routes/provider/messages/count-tokens-handler.ts new file mode 100644 index 000000000..855e32a2e --- /dev/null +++ b/src/routes/provider/messages/count-tokens-handler.ts @@ -0,0 +1,76 @@ +import type { Context } from "hono" + +import type { Model } from "~/services/copilot/get-models" + +import { createHandlerLogger } from "~/lib/logger" +import { state } from "~/lib/state" +import { getTokenCount } from "~/lib/tokenizer" +import { type AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" +import { translateToOpenAI } from "~/routes/messages/non-stream-translation" + +const logger = createHandlerLogger("provider-count-tokens-handler") + +const createFallbackModel = (modelId: string): Model => ({ + capabilities: { + family: "provider", + limits: {}, + object: "model_capabilities", + supports: {}, + tokenizer: "o200k_base", + type: "chat", + }, + id: modelId, + model_picker_enabled: false, + name: modelId, + object: "model", + preview: false, + vendor: "provider", + version: "unknown", +}) + +export async function handleProviderCountTokens(c: Context): Promise { + const provider = c.req.param("provider") + + try { + const anthropicPayload = await c.req.json() + const openAIPayload = translateToOpenAI(anthropicPayload) + const modelId = anthropicPayload.model.trim() + + let selectedModel = state.models?.data.find((model) => model.id === modelId) + + if (!selectedModel && modelId) { + selectedModel = createFallbackModel(modelId) + } + + if (!selectedModel) { + logger.warn("provider.count_tokens.model_not_found", { + provider, + model: anthropicPayload.model, + }) + return c.json({ + input_tokens: 1, + }) + } + + const tokenCount = await getTokenCount(openAIPayload, selectedModel) + const finalTokenCount = tokenCount.input + tokenCount.output + + logger.debug("provider.count_tokens.success", { + provider, + model: anthropicPayload.model, + input_tokens: finalTokenCount, + }) + + return c.json({ + input_tokens: finalTokenCount, + }) + } catch (error) { + logger.error("provider.count_tokens.error", { + provider, + error, + }) + return c.json({ + input_tokens: 1, + }) + } +} diff --git a/src/routes/provider/messages/handler.ts b/src/routes/provider/messages/handler.ts new file mode 100644 index 000000000..b0c4ffb86 --- /dev/null +++ b/src/routes/provider/messages/handler.ts @@ -0,0 +1,141 @@ +import type { Context } from "hono" + +import { events } from "fetch-event-stream" +import { streamSSE } from "hono/streaming" + +import type { + AnthropicMessagesPayload, + AnthropicResponse, + AnthropicStreamEventData, +} from "~/routes/messages/anthropic-types" + +import { getProviderConfig, type ResolvedProviderConfig } from "~/lib/config" +import { HTTPError } from "~/lib/error" +import { createHandlerLogger } from "~/lib/logger" +import { forwardProviderMessages } from "~/services/providers/anthropic-proxy" + +const logger = createHandlerLogger("provider-messages-handler") + +export async function handleProviderMessages(c: Context): Promise { + const provider = c.req.param("provider") + const providerConfig = getProviderConfig(provider) + if (!providerConfig) { + return c.json( + { + error: { + message: `Provider '${provider}' not found or disabled`, + type: "invalid_request_error", + }, + }, + 404, + ) + } + + try { + const payload = await c.req.json() + + const modelConfig = providerConfig.models?.[payload.model] + payload.temperature ??= modelConfig?.temperature + payload.top_p ??= modelConfig?.topP + payload.top_k ??= modelConfig?.topK + + logger.debug( + "provider.messages.request", + JSON.stringify({ payload, provider }), + ) + + const upstreamResponse = await forwardProviderMessages( + providerConfig, + payload, + c.req.raw.headers, + ) + + if (!upstreamResponse.ok) { + logger.error("Failed to create responses", upstreamResponse) + throw new HTTPError("Failed to create responses", upstreamResponse) + } + + const contentType = upstreamResponse.headers.get("content-type") ?? "" + const isStreamingResponse = + Boolean(payload.stream) && contentType.includes("text/event-stream") + + if (isStreamingResponse) { + logger.debug("provider.messages.streaming") + return streamSSE(c, async (stream) => { + for await (const chunk of events(upstreamResponse)) { + logger.debug("provider.messages.raw_stream_event:", chunk.data) + const eventName = chunk.event + if (eventName === "ping") { + await stream.writeSSE({ event: "ping", data: '{"type":"ping"}' }) + continue + } + + let data = chunk.data + if (!data) { + continue + } + + if (chunk.data === "[DONE]") { + break + } + + try { + const parsed = JSON.parse(data) as AnthropicStreamEventData + if (parsed.type === "message_start") { + adjustInputTokens(providerConfig, parsed.message.usage) + } else if (parsed.type === "message_delta") { + adjustInputTokens(providerConfig, parsed.usage) + } + data = JSON.stringify(parsed) + } catch (error) { + logger.error("provider.messages.streaming.adjust_tokens_error", { + error, + originalData: data, + }) + } + await stream.writeSSE({ + event: eventName, + data, + }) + } + }) + } + + const jsonBody = (await upstreamResponse.json()) as AnthropicResponse + + adjustInputTokens(providerConfig, jsonBody.usage) + + logger.debug( + "provider.messages.no_stream result:", + JSON.stringify(jsonBody), + ) + return c.json(jsonBody) + } catch (error) { + logger.error("provider.messages.error", { + provider, + error, + }) + throw error + } +} + +const adjustInputTokens = ( + providerConfig: ResolvedProviderConfig, + usage?: { + input_tokens?: number + cache_read_input_tokens?: number + cache_creation_input_tokens?: number + }, +): void => { + if (!providerConfig.adjustInputTokens || !usage) { + return + } + const adjustedInput = Math.max( + 0, + (usage.input_tokens ?? 0) + - (usage.cache_read_input_tokens ?? 0) + - (usage.cache_creation_input_tokens ?? 0), + ) + usage.input_tokens = adjustedInput + logger.debug("provider.messages.adjusted_usage:", JSON.stringify(usage)) +} diff --git a/src/routes/provider/messages/route.ts b/src/routes/provider/messages/route.ts new file mode 100644 index 000000000..24cd05942 --- /dev/null +++ b/src/routes/provider/messages/route.ts @@ -0,0 +1,24 @@ +import { Hono } from "hono" + +import { forwardError } from "~/lib/error" + +import { handleProviderCountTokens } from "./count-tokens-handler" +import { handleProviderMessages } from "./handler" + +export const providerMessageRoutes = new Hono() + +providerMessageRoutes.post("/", async (c) => { + try { + return await handleProviderMessages(c) + } catch (error) { + return await forwardError(c, error) + } +}) + +providerMessageRoutes.post("/count_tokens", async (c) => { + try { + return await handleProviderCountTokens(c) + } catch (error) { + return await forwardError(c, error) + } +}) diff --git a/src/routes/provider/models/route.ts b/src/routes/provider/models/route.ts new file mode 100644 index 000000000..ec4cec247 --- /dev/null +++ b/src/routes/provider/models/route.ts @@ -0,0 +1,50 @@ +import { Hono } from "hono" + +import { getProviderConfig } from "~/lib/config" +import { forwardError } from "~/lib/error" +import { createHandlerLogger } from "~/lib/logger" +import { + createProviderProxyResponse, + forwardProviderModels, +} from "~/services/providers/anthropic-proxy" + +const logger = createHandlerLogger("provider-models-handler") + +export const providerModelRoutes = new Hono() + +providerModelRoutes.get("/", async (c) => { + const provider = c.req.param("provider") ?? "" + + try { + const providerConfig = getProviderConfig(provider) + if (!providerConfig) { + return c.json( + { + error: { + message: `Provider '${provider}' not found or disabled`, + type: "invalid_request_error", + }, + }, + 404, + ) + } + + const upstreamResponse = await forwardProviderModels( + providerConfig, + c.req.raw.headers, + ) + + logger.debug("provider.models.response", { + provider, + statusCode: upstreamResponse.status, + }) + + return createProviderProxyResponse(upstreamResponse) + } catch (error) { + logger.error("provider.models.error", { + provider, + error, + }) + return await forwardError(c, error) + } +}) diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts new file mode 100644 index 000000000..cbcf2e307 --- /dev/null +++ b/src/routes/responses/handler.ts @@ -0,0 +1,163 @@ +import type { Context } from "hono" + +import { streamSSE } from "hono/streaming" + +import { awaitApproval } from "~/lib/approval" +import { getConfig, isResponsesApiWebSearchEnabled } from "~/lib/config" +import { createHandlerLogger } from "~/lib/logger" +import { checkRateLimit } from "~/lib/rate-limit" +import { state } from "~/lib/state" +import { generateRequestIdFromPayload, getUUID } from "~/lib/utils" +import { + createResponses, + type ResponsesPayload, + type ResponsesResult, +} from "~/services/copilot/create-responses" + +import { createStreamIdTracker, fixStreamIds } from "./stream-id-sync" +import { + applyResponsesApiContextManagement, + compactInputByLatestCompaction, + getResponsesRequestOptions, +} from "./utils" + +const logger = createHandlerLogger("responses-handler") + +const RESPONSES_ENDPOINT = "/responses" + +export const handleResponses = async (c: Context) => { + await checkRateLimit(state) + + const payload = await c.req.json() + logger.debug("Responses request payload:", JSON.stringify(payload)) + + // not support subagent marker for now , set sessionId = getUUID(requestId) + const requestId = generateRequestIdFromPayload({ messages: payload.input }) + logger.debug("Generated request ID:", requestId) + + const sessionId = getUUID(requestId) + logger.debug("Extracted session ID:", sessionId) + + useFunctionApplyPatch(payload) + + if (!isResponsesApiWebSearchEnabled()) { + removeWebSearchTool(payload) + } + + compactInputByLatestCompaction(payload) + + const selectedModel = state.models?.data.find( + (model) => model.id === payload.model, + ) + const supportsResponses = + selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false + + if (!supportsResponses) { + return c.json( + { + error: { + message: + "This model does not support the responses endpoint. Please choose a different model.", + type: "invalid_request_error", + }, + }, + 400, + ) + } + + applyResponsesApiContextManagement( + payload, + selectedModel?.capabilities.limits.max_prompt_tokens, + ) + + logger.debug("Translated Responses payload:", JSON.stringify(payload)) + + const { vision, initiator } = getResponsesRequestOptions(payload) + + if (state.manualApprove) { + await awaitApproval() + } + + const response = await createResponses(payload, { + vision, + initiator, + requestId, + sessionId: sessionId, + }) + + if (isStreamingRequested(payload) && isAsyncIterable(response)) { + logger.debug("Forwarding native Responses stream") + return streamSSE(c, async (stream) => { + const idTracker = createStreamIdTracker() + + for await (const chunk of response) { + logger.debug("Responses stream chunk:", JSON.stringify(chunk)) + + const processedData = fixStreamIds( + (chunk as { data?: string }).data ?? "", + (chunk as { event?: string }).event, + idTracker, + ) + + await stream.writeSSE({ + id: (chunk as { id?: string }).id, + event: (chunk as { event?: string }).event, + data: processedData, + }) + } + }) + } + + logger.debug( + "Forwarding native Responses result:", + JSON.stringify(response).slice(-400), + ) + return c.json(response as ResponsesResult) +} + +const isAsyncIterable = (value: unknown): value is AsyncIterable => + Boolean(value) + && typeof (value as AsyncIterable)[Symbol.asyncIterator] === "function" + +const isStreamingRequested = (payload: ResponsesPayload): boolean => + Boolean(payload.stream) + +const useFunctionApplyPatch = (payload: ResponsesPayload): void => { + const config = getConfig() + const useFunctionApplyPatch = config.useFunctionApplyPatch ?? true + if (useFunctionApplyPatch) { + logger.debug("Using function tool apply_patch for responses") + if (Array.isArray(payload.tools)) { + const toolsArr = payload.tools + for (let i = 0; i < toolsArr.length; i++) { + const t = toolsArr[i] + if (t.type === "custom" && t.name === "apply_patch") { + toolsArr[i] = { + type: "function", + name: t.name, + description: "Use the `apply_patch` tool to edit files", + parameters: { + type: "object", + properties: { + input: { + type: "string", + description: "The entire contents of the apply_patch command", + }, + }, + required: ["input"], + }, + strict: false, + } + } + } + } + } +} + +const removeWebSearchTool = (payload: ResponsesPayload): void => { + if (!Array.isArray(payload.tools) || payload.tools.length === 0) return + + payload.tools = payload.tools.filter((t) => { + return t.type !== "web_search" + }) +} diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts new file mode 100644 index 000000000..af2423427 --- /dev/null +++ b/src/routes/responses/route.ts @@ -0,0 +1,15 @@ +import { Hono } from "hono" + +import { forwardError } from "~/lib/error" + +import { handleResponses } from "./handler" + +export const responsesRoutes = new Hono() + +responsesRoutes.post("/", async (c) => { + try { + return await handleResponses(c) + } catch (error) { + return await forwardError(c, error) + } +}) diff --git a/src/routes/responses/stream-id-sync.ts b/src/routes/responses/stream-id-sync.ts new file mode 100644 index 000000000..48b3811a1 --- /dev/null +++ b/src/routes/responses/stream-id-sync.ts @@ -0,0 +1,97 @@ +/** + * Stream ID Synchronization for @ai-sdk/openai compatibility + * + * Problem: GitHub Copilot's Responses API returns different IDs for the same + * item in 'added' vs 'done' events. This breaks @ai-sdk/openai which expects + * consistent IDs across the stream lifecycle. + * + * Errors without this fix: + * - "activeReasoningPart.summaryParts" undefined + * - "text part not found" + * + * Use case: OpenCode (AI coding assistant) using Codex models (gpt-5.2-codex) + * via @ai-sdk/openai provider requires the Responses API endpoint. + */ + +import type { + ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, + ResponseStreamEvent, +} from "~/services/copilot/create-responses" + +interface StreamIdTracker { + outputItems: Map +} + +export const createStreamIdTracker = (): StreamIdTracker => ({ + outputItems: new Map(), +}) + +export const fixStreamIds = ( + data: string, + event: string | undefined, + tracker: StreamIdTracker, +): string => { + if (!data) return data + const parsed = JSON.parse(data) as ResponseStreamEvent + switch (event) { + case "response.output_item.added": { + return handleOutputItemAdded( + parsed as ResponseOutputItemAddedEvent, + tracker, + ) + } + case "response.output_item.done": { + return handleOutputItemDone( + parsed as ResponseOutputItemDoneEvent, + tracker, + ) + } + default: { + return handleItemId(parsed, tracker) + } + } +} + +const handleOutputItemAdded = ( + parsed: ResponseOutputItemAddedEvent, + tracker: StreamIdTracker, +): string => { + if (!parsed.item.id) { + let randomSuffix = "" + while (randomSuffix.length < 16) { + randomSuffix += Math.random().toString(36).slice(2) + } + parsed.item.id = `oi_${parsed.output_index}_${randomSuffix.slice(0, 16)}` + } + + const outputIndex = parsed.output_index + tracker.outputItems.set(outputIndex, parsed.item.id) + return JSON.stringify(parsed) +} + +const handleOutputItemDone = ( + parsed: ResponseOutputItemDoneEvent, + tracker: StreamIdTracker, +): string => { + const outputIndex = parsed.output_index + const originalId = tracker.outputItems.get(outputIndex) + if (originalId) { + parsed.item.id = originalId + } + return JSON.stringify(parsed) +} + +const handleItemId = ( + parsed: ResponseStreamEvent & { output_index?: number; item_id?: string }, + tracker: StreamIdTracker, +): string => { + const outputIndex = parsed.output_index + if (outputIndex !== undefined) { + const itemId = tracker.outputItems.get(outputIndex) + if (itemId) { + parsed.item_id = itemId + } + } + return JSON.stringify(parsed) +} diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts new file mode 100644 index 000000000..1dcb78b8a --- /dev/null +++ b/src/routes/responses/utils.ts @@ -0,0 +1,148 @@ +import type { + ResponseContextManagementCompactionItem, + ResponseInputItem, + ResponsesPayload, +} from "~/services/copilot/create-responses" + +import { isResponsesApiContextManagementModel } from "~/lib/config" + +export const getResponsesRequestOptions = ( + payload: ResponsesPayload, +): { vision: boolean; initiator: "agent" | "user" } => { + const vision = hasVisionInput(payload) + const initiator = hasAgentInitiator(payload) ? "agent" : "user" + + return { vision, initiator } +} + +export const hasAgentInitiator = (payload: ResponsesPayload): boolean => { + const lastItem = getPayloadItems(payload).at(-1) + if (!lastItem) { + return false + } + if (!("role" in lastItem) || !lastItem.role) { + return true + } + const role = + typeof lastItem.role === "string" ? lastItem.role.toLowerCase() : "" + return role === "assistant" +} + +export const hasVisionInput = (payload: ResponsesPayload): boolean => { + const values = getPayloadItems(payload) + return values.some((item) => containsVisionContent(item)) +} + +export const resolveResponsesCompactThreshold = ( + maxPromptTokens?: number, +): number => { + if (typeof maxPromptTokens === "number" && maxPromptTokens > 0) { + return Math.floor(maxPromptTokens * 0.9) + } + + return 50000 +} + +const createCompactionContextManagement = ( + compactThreshold: number, +): Array => [ + { + type: "compaction", + compact_threshold: compactThreshold, + }, +] + +export const applyResponsesApiContextManagement = ( + payload: ResponsesPayload, + maxPromptTokens?: number, +): void => { + if (payload.context_management !== undefined) { + return + } + + if (!isResponsesApiContextManagementModel(payload.model)) { + return + } + + payload.context_management = createCompactionContextManagement( + resolveResponsesCompactThreshold(maxPromptTokens), + ) +} + +export const compactInputByLatestCompaction = ( + payload: ResponsesPayload, +): void => { + if (!Array.isArray(payload.input) || payload.input.length === 0) { + return + } + + const latestCompactionMessageIndex = getLatestCompactionMessageIndex( + payload.input, + ) + + if (latestCompactionMessageIndex === undefined) { + return + } + + payload.input = payload.input.slice(latestCompactionMessageIndex) +} + +const getLatestCompactionMessageIndex = ( + input: Array, +): number | undefined => { + for (let index = input.length - 1; index >= 0; index -= 1) { + if (isCompactionInputItem(input[index])) { + return index + } + } + + return undefined +} + +const isCompactionInputItem = (value: ResponseInputItem): boolean => { + return ( + "type" in value + && typeof value.type === "string" + && value.type === "compaction" + ) +} + +const getPayloadItems = ( + payload: ResponsesPayload, +): Array => { + const result: Array = [] + + const { input } = payload + + if (Array.isArray(input)) { + result.push(...input) + } + + return result +} + +const containsVisionContent = (value: unknown): boolean => { + if (!value) return false + + if (Array.isArray(value)) { + return value.some((entry) => containsVisionContent(entry)) + } + + if (typeof value !== "object") { + return false + } + + const record = value as Record + const type = + typeof record.type === "string" ? record.type.toLowerCase() : undefined + + if (type === "input_image") { + return true + } + + if (Array.isArray(record.content)) { + return record.content.some((entry) => containsVisionContent(entry)) + } + + return false +} diff --git a/src/server.ts b/src/server.ts index 462a278f3..9b1b3aef8 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,31 +1,55 @@ import { Hono } from "hono" import { cors } from "hono/cors" import { logger } from "hono/logger" +import { readFileSync } from "node:fs" +import { createAuthMiddleware } from "./lib/request-auth" +import { traceIdMiddleware } from "./lib/trace" import { completionRoutes } from "./routes/chat-completions/route" import { embeddingRoutes } from "./routes/embeddings/route" import { messageRoutes } from "./routes/messages/route" import { modelRoutes } from "./routes/models/route" +import { providerMessageRoutes } from "./routes/provider/messages/route" +import { providerModelRoutes } from "./routes/provider/models/route" +import { responsesRoutes } from "./routes/responses/route" import { tokenRoute } from "./routes/token/route" import { usageRoute } from "./routes/usage/route" export const server = new Hono() +server.use(traceIdMiddleware) server.use(logger()) server.use(cors()) +server.use( + "*", + createAuthMiddleware({ + allowUnauthenticatedPaths: ["/", "/usage-viewer", "/usage-viewer/"], + }), +) server.get("/", (c) => c.text("Server running")) +server.get("/usage-viewer", (c) => { + const usageViewerFileUrl = new URL("../pages/index.html", import.meta.url) + return c.html(readFileSync(usageViewerFileUrl, "utf8")) +}) +server.get("/usage-viewer/", (c) => c.redirect("/usage-viewer", 301)) server.route("/chat/completions", completionRoutes) server.route("/models", modelRoutes) server.route("/embeddings", embeddingRoutes) server.route("/usage", usageRoute) server.route("/token", tokenRoute) +server.route("/responses", responsesRoutes) // Compatibility with tools that expect v1/ prefix server.route("/v1/chat/completions", completionRoutes) server.route("/v1/models", modelRoutes) server.route("/v1/embeddings", embeddingRoutes) +server.route("/v1/responses", responsesRoutes) // Anthropic compatible endpoints server.route("/v1/messages", messageRoutes) + +// Provider scoped Anthropic-compatible endpoints +server.route("/:provider/v1/messages", providerMessageRoutes) +server.route("/:provider/v1/models", providerModelRoutes) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 260fb19ea..0c249ec66 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -1,33 +1,62 @@ import consola from "consola" import { events } from "fetch-event-stream" +import { randomUUID } from "node:crypto" -import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config" +import type { SubagentMarker } from "~/routes/messages/subagent-marker" + +import { + copilotBaseUrl, + copilotHeaders, + prepareForCompact, + prepareInteractionHeaders, +} from "~/lib/api-config" import { HTTPError } from "~/lib/error" import { state } from "~/lib/state" export const createChatCompletions = async ( payload: ChatCompletionsPayload, + options: { + subagentMarker?: SubagentMarker | null + requestId?: string + sessionId?: string + isCompact?: boolean + } = {}, ) => { if (!state.copilotToken) throw new Error("Copilot token not found") + const requestId = options.requestId ?? randomUUID() + const enableVision = payload.messages.some( (x) => typeof x.content !== "string" && x.content?.some((x) => x.type === "image_url"), ) - // Agent/user check for X-Initiator header - // Determine if any message is from an agent ("assistant" or "tool") - const isAgentCall = - state.forceAgentInitiator - || payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role)) + // Agent/user check for x-initiator header + // Refactor `isAgentCall` logic to check only the last message in the history rather than any message. + // BlueSkyXN: support forceAgentInitiator flag to force agent initiator for all requests + let isAgentCall = state.forceAgentInitiator ?? false + if (!isAgentCall && payload.messages.length > 0) { + const lastMessage = payload.messages.at(-1) + if (lastMessage) { + isAgentCall = ["assistant", "tool"].includes(lastMessage.role) + } + } - // Build headers and add X-Initiator + // Build headers and add x-initiator const headers: Record = { - ...copilotHeaders(state, enableVision), - "X-Initiator": isAgentCall ? "agent" : "user", + ...copilotHeaders(state, requestId, enableVision), + "x-initiator": isAgentCall ? "agent" : "user", } + prepareInteractionHeaders( + options.sessionId, + Boolean(options.subagentMarker), + headers, + ) + + prepareForCompact(headers, options.isCompact) + const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, { method: "POST", headers, @@ -69,7 +98,7 @@ export interface ChatCompletionChunk { } } -interface Delta { +export interface Delta { content?: string | null role?: "user" | "assistant" | "system" | "tool" tool_calls?: Array<{ @@ -81,9 +110,11 @@ interface Delta { arguments?: string } }> + reasoning_text?: string | null + reasoning_opaque?: string | null } -interface Choice { +export interface Choice { index: number delta: Delta finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null @@ -112,6 +143,8 @@ export interface ChatCompletionResponse { interface ResponseMessage { role: "assistant" content: string | null + reasoning_text?: string | null + reasoning_opaque?: string | null tool_calls?: Array } @@ -148,6 +181,7 @@ export interface ChatCompletionsPayload { | { type: "function"; function: { name: string } } | null user?: string | null + thinking_budget?: number } export interface Tool { @@ -166,6 +200,8 @@ export interface Message { name?: string tool_calls?: Array tool_call_id?: string + reasoning_text?: string | null + reasoning_opaque?: string | null } export interface ToolCall { diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts new file mode 100644 index 000000000..c32cf52a1 --- /dev/null +++ b/src/services/copilot/create-messages.ts @@ -0,0 +1,126 @@ +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { + AnthropicMessagesPayload, + AnthropicResponse, +} from "~/routes/messages/anthropic-types" +import type { SubagentMarker } from "~/routes/messages/subagent-marker" + +import { + copilotBaseUrl, + copilotHeaders, + prepareForCompact, + prepareInteractionHeaders, +} from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +export type MessagesStream = ReturnType +export type CreateMessagesReturn = AnthropicResponse | MessagesStream + +const INTERLEAVED_THINKING_BETA = "interleaved-thinking-2025-05-14" +const allowedAnthropicBetas = new Set([ + INTERLEAVED_THINKING_BETA, + "context-management-2025-06-27", + "advanced-tool-use-2025-11-20", +]) + +const buildAnthropicBetaHeader = ( + anthropicBetaHeader: string | undefined, + thinking: AnthropicMessagesPayload["thinking"], +): string | undefined => { + const isAdaptiveThinking = thinking?.type === "adaptive" + + if (anthropicBetaHeader) { + const filteredBeta = anthropicBetaHeader + .split(",") + .map((item) => item.trim()) + .filter((item) => item.length > 0) + .filter((item) => allowedAnthropicBetas.has(item)) + const uniqueFilteredBetas = [...new Set(filteredBeta)] + const finalFilteredBetas = + isAdaptiveThinking ? + uniqueFilteredBetas.filter((item) => item !== INTERLEAVED_THINKING_BETA) + : uniqueFilteredBetas + + if (finalFilteredBetas.length > 0) { + return finalFilteredBetas.join(",") + } + + return undefined + } + + if (thinking?.budget_tokens && !isAdaptiveThinking) { + return INTERLEAVED_THINKING_BETA + } + + return undefined +} + +export const createMessages = async ( + payload: AnthropicMessagesPayload, + anthropicBetaHeader: string | undefined, + options: { + subagentMarker?: SubagentMarker | null + requestId: string + sessionId?: string + isCompact?: boolean + }, +): Promise => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const enableVision = payload.messages.some( + (message) => + Array.isArray(message.content) + && message.content.some((block) => block.type === "image"), + ) + + let isInitiateRequest = false + const lastMessage = payload.messages.at(-1) + if (lastMessage?.role === "user") { + isInitiateRequest = + Array.isArray(lastMessage.content) ? + lastMessage.content.some((block) => block.type !== "tool_result") + : true + } + + const headers: Record = { + ...copilotHeaders(state, options.requestId, enableVision), + "x-initiator": isInitiateRequest ? "user" : "agent", + } + + prepareInteractionHeaders( + options.sessionId, + Boolean(options.subagentMarker), + headers, + ) + + prepareForCompact(headers, options.isCompact) + + // align with vscode copilot extension anthropic-beta + const anthropicBeta = buildAnthropicBetaHeader( + anthropicBetaHeader, + payload.thinking, + ) + if (anthropicBeta) { + headers["anthropic-beta"] = anthropicBeta + } + + const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + consola.error("Failed to create messages", response) + throw new HTTPError("Failed to create messages", response) + } + + if (payload.stream) { + return events(response) + } + + return (await response.json()) as AnthropicResponse +} diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts new file mode 100644 index 000000000..460839500 --- /dev/null +++ b/src/services/copilot/create-responses.ts @@ -0,0 +1,405 @@ +import consola from "consola" +import { events } from "fetch-event-stream" + +import type { SubagentMarker } from "~/routes/messages/subagent-marker" + +import { + copilotBaseUrl, + copilotHeaders, + prepareForCompact, + prepareInteractionHeaders, +} from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +export interface ResponsesPayload { + model: string + instructions?: string | null + input?: string | Array + tools?: Array | null + tool_choice?: ToolChoiceOptions | ToolChoiceFunction + temperature?: number | null + top_p?: number | null + max_output_tokens?: number | null + metadata?: Metadata | null + stream?: boolean | null + safety_identifier?: string | null + prompt_cache_key?: string | null + parallel_tool_calls?: boolean | null + store?: boolean | null + reasoning?: Reasoning | null + context_management?: Array | null + include?: Array + service_tier?: string | null + [key: string]: unknown +} + +export type ToolChoiceOptions = "none" | "auto" | "required" + +export interface ToolChoiceFunction { + name: string + type: "function" +} + +export type Tool = FunctionTool | Record + +export interface FunctionTool { + name: string + parameters: { [key: string]: unknown } | null + strict: boolean | null + type: "function" + description?: string | null +} + +export type ResponseIncludable = + | "file_search_call.results" + | "message.input_image.image_url" + | "computer_call_output.output.image_url" + | "reasoning.encrypted_content" + | "code_interpreter_call.outputs" + +export interface Reasoning { + effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | null + summary?: "auto" | "concise" | "detailed" | null +} + +export interface ResponseContextManagementCompactionItem { + type: "compaction" + compact_threshold: number +} + +export type ResponseContextManagementItem = + ResponseContextManagementCompactionItem + +export interface ResponseInputMessage { + type?: "message" + role: "user" | "assistant" | "system" | "developer" + content?: string | Array + status?: string + phase?: "commentary" | "final_answer" +} + +export interface ResponseFunctionToolCallItem { + type: "function_call" + call_id: string + name: string + arguments: string + status?: "in_progress" | "completed" | "incomplete" +} + +export interface ResponseFunctionCallOutputItem { + type: "function_call_output" + call_id: string + output: string | Array + status?: "in_progress" | "completed" | "incomplete" +} + +export interface ResponseInputReasoning { + id?: string + type: "reasoning" + summary: Array<{ + type: "summary_text" + text: string + }> + encrypted_content: string +} + +export interface ResponseInputCompaction { + id: string + type: "compaction" + encrypted_content: string +} + +export type ResponseInputItem = + | ResponseInputMessage + | ResponseFunctionToolCallItem + | ResponseFunctionCallOutputItem + | ResponseInputReasoning + | ResponseInputCompaction + | Record + +export type ResponseInputContent = + | ResponseInputText + | ResponseInputImage + | Record + +export interface ResponseInputText { + type: "input_text" | "output_text" + text: string +} + +export interface ResponseInputImage { + type: "input_image" + image_url?: string | null + file_id?: string | null + detail: "low" | "high" | "auto" +} + +export interface ResponsesResult { + id: string + object: "response" + created_at: number + model: string + output: Array + output_text: string + status: string + usage?: ResponseUsage | null + error: ResponseError | null + incomplete_details: IncompleteDetails | null + instructions: string | null + metadata: Metadata | null + parallel_tool_calls: boolean + temperature: number | null + tool_choice: unknown + tools: Array + top_p: number | null +} + +export type Metadata = { [key: string]: string } + +export interface IncompleteDetails { + reason?: "max_output_tokens" | "content_filter" +} + +export interface ResponseError { + message: string +} + +export type ResponseOutputItem = + | ResponseOutputMessage + | ResponseOutputReasoning + | ResponseOutputFunctionCall + | ResponseOutputCompaction + +export interface ResponseOutputMessage { + id: string + type: "message" + role: "assistant" + status: "completed" | "in_progress" | "incomplete" + content?: Array +} + +export interface ResponseOutputReasoning { + id: string + type: "reasoning" + summary?: Array + encrypted_content?: string + status?: "completed" | "in_progress" | "incomplete" +} + +export interface ResponseReasoningBlock { + type: string + text?: string +} + +export interface ResponseOutputFunctionCall { + id?: string + type: "function_call" + call_id: string + name: string + arguments: string + status?: "in_progress" | "completed" | "incomplete" +} + +export interface ResponseOutputCompaction { + id: string + type: "compaction" + encrypted_content: string +} + +export type ResponseOutputContentBlock = + | ResponseOutputText + | ResponseOutputRefusal + | Record + +export interface ResponseOutputText { + type: "output_text" + text: string + annotations: Array +} + +export interface ResponseOutputRefusal { + type: "refusal" + refusal: string +} + +export interface ResponseUsage { + input_tokens: number + output_tokens?: number + total_tokens: number + input_tokens_details?: { + cached_tokens: number + } + output_tokens_details?: { + reasoning_tokens: number + } +} + +export type ResponseStreamEvent = + | ResponseCompletedEvent + | ResponseIncompleteEvent + | ResponseCreatedEvent + | ResponseErrorEvent + | ResponseFunctionCallArgumentsDeltaEvent + | ResponseFunctionCallArgumentsDoneEvent + | ResponseFailedEvent + | ResponseOutputItemAddedEvent + | ResponseOutputItemDoneEvent + | ResponseReasoningSummaryTextDeltaEvent + | ResponseReasoningSummaryTextDoneEvent + | ResponseTextDeltaEvent + | ResponseTextDoneEvent + +export interface ResponseCompletedEvent { + response: ResponsesResult + sequence_number: number + type: "response.completed" +} + +export interface ResponseIncompleteEvent { + response: ResponsesResult + sequence_number: number + type: "response.incomplete" +} + +export interface ResponseCreatedEvent { + response: ResponsesResult + sequence_number: number + type: "response.created" +} + +export interface ResponseErrorEvent { + code: string | null + message: string + param: string | null + sequence_number: number + type: "error" +} + +export interface ResponseFunctionCallArgumentsDeltaEvent { + delta: string + item_id: string + output_index: number + sequence_number: number + type: "response.function_call_arguments.delta" +} + +export interface ResponseFunctionCallArgumentsDoneEvent { + arguments: string + item_id: string + name: string + output_index: number + sequence_number: number + type: "response.function_call_arguments.done" +} + +export interface ResponseFailedEvent { + response: ResponsesResult + sequence_number: number + type: "response.failed" +} + +export interface ResponseOutputItemAddedEvent { + item: ResponseOutputItem + output_index: number + sequence_number: number + type: "response.output_item.added" +} + +export interface ResponseOutputItemDoneEvent { + item: ResponseOutputItem + output_index: number + sequence_number: number + type: "response.output_item.done" +} + +export interface ResponseReasoningSummaryTextDeltaEvent { + delta: string + item_id: string + output_index: number + sequence_number: number + summary_index: number + type: "response.reasoning_summary_text.delta" +} + +export interface ResponseReasoningSummaryTextDoneEvent { + item_id: string + output_index: number + sequence_number: number + summary_index: number + text: string + type: "response.reasoning_summary_text.done" +} + +export interface ResponseTextDeltaEvent { + content_index: number + delta: string + item_id: string + output_index: number + sequence_number: number + type: "response.output_text.delta" +} + +export interface ResponseTextDoneEvent { + content_index: number + item_id: string + output_index: number + sequence_number: number + text: string + type: "response.output_text.done" +} + +export type ResponsesStream = ReturnType +export type CreateResponsesReturn = ResponsesResult | ResponsesStream + +interface ResponsesRequestOptions { + vision: boolean + initiator: "agent" | "user" + subagentMarker?: SubagentMarker | null + requestId: string + sessionId?: string + isCompact?: boolean +} + +export const createResponses = async ( + payload: ResponsesPayload, + { + vision, + initiator, + subagentMarker, + requestId, + sessionId, + isCompact, + }: ResponsesRequestOptions, +): Promise => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const headers: Record = { + ...copilotHeaders(state, requestId, vision), + "x-initiator": initiator, + } + + prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers) + + prepareForCompact(headers, isCompact) + + // service_tier is not supported by github copilot + payload.service_tier = null + + const response = await fetch(`${copilotBaseUrl(state)}/responses`, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + consola.error("Failed to create responses", response) + throw new HTTPError("Failed to create responses", response) + } + + if (payload.stream) { + return events(response) + } + + return (await response.json()) as ResponsesResult +} diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 3cfa30af0..cf3f184b5 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -25,9 +25,15 @@ interface ModelLimits { } interface ModelSupports { + max_thinking_budget?: number + min_thinking_budget?: number tool_calls?: boolean parallel_tool_calls?: boolean dimensions?: boolean + streaming?: boolean + structured_outputs?: boolean + vision?: boolean + adaptive_thinking?: boolean } interface ModelCapabilities { @@ -52,4 +58,5 @@ export interface Model { state: string terms: string } + supported_endpoints?: Array } diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts index 6078f09b5..746a16126 100644 --- a/src/services/get-vscode-version.ts +++ b/src/services/get-vscode-version.ts @@ -1,33 +1,6 @@ -const FALLBACK = "1.104.3" +const FALLBACK = "1.110.1" export async function getVSCodeVersion() { - const controller = new AbortController() - const timeout = setTimeout(() => { - controller.abort() - }, 5000) - - try { - const response = await fetch( - "https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h=visual-studio-code-bin", - { - signal: controller.signal, - }, - ) - - const pkgbuild = await response.text() - const pkgverRegex = /pkgver=([0-9.]+)/ - const match = pkgbuild.match(pkgverRegex) - - if (match) { - return match[1] - } - - return FALLBACK - } catch { - return FALLBACK - } finally { - clearTimeout(timeout) - } + await Promise.resolve() + return FALLBACK } - -await getVSCodeVersion() diff --git a/src/services/github/get-copilot-token.ts b/src/services/github/get-copilot-token.ts index 98744bab1..9c33c038c 100644 --- a/src/services/github/get-copilot-token.ts +++ b/src/services/github/get-copilot-token.ts @@ -1,10 +1,10 @@ -import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config" +import { getGitHubApiBaseUrl, githubHeaders } from "~/lib/api-config" import { HTTPError } from "~/lib/error" import { state } from "~/lib/state" export const getCopilotToken = async () => { const response = await fetch( - `${GITHUB_API_BASE_URL}/copilot_internal/v2/token`, + `${getGitHubApiBaseUrl()}/copilot_internal/v2/token`, { headers: githubHeaders(state), }, diff --git a/src/services/github/get-copilot-usage.ts b/src/services/github/get-copilot-usage.ts index 6cdd8bc10..1af6632e4 100644 --- a/src/services/github/get-copilot-usage.ts +++ b/src/services/github/get-copilot-usage.ts @@ -1,11 +1,14 @@ -import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config" +import { getGitHubApiBaseUrl, githubHeaders } from "~/lib/api-config" import { HTTPError } from "~/lib/error" import { state } from "~/lib/state" export const getCopilotUsage = async (): Promise => { - const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, { - headers: githubHeaders(state), - }) + const response = await fetch( + `${getGitHubApiBaseUrl()}/copilot_internal/user`, + { + headers: githubHeaders(state), + }, + ) if (!response.ok) { throw new HTTPError("Failed to get Copilot usage", response) diff --git a/src/services/github/get-device-code.ts b/src/services/github/get-device-code.ts index cf35f4ec9..79d26ba70 100644 --- a/src/services/github/get-device-code.ts +++ b/src/services/github/get-device-code.ts @@ -1,18 +1,16 @@ -import { - GITHUB_APP_SCOPES, - GITHUB_BASE_URL, - GITHUB_CLIENT_ID, - standardHeaders, -} from "~/lib/api-config" +import { getOauthAppConfig, getOauthUrls } from "~/lib/api-config" import { HTTPError } from "~/lib/error" export async function getDeviceCode(): Promise { - const response = await fetch(`${GITHUB_BASE_URL}/login/device/code`, { + const { clientId, headers, scope } = getOauthAppConfig() + const { deviceCodeUrl } = getOauthUrls() + + const response = await fetch(deviceCodeUrl, { method: "POST", - headers: standardHeaders(), + headers, body: JSON.stringify({ - client_id: GITHUB_CLIENT_ID, - scope: GITHUB_APP_SCOPES, + client_id: clientId, + scope, }), }) diff --git a/src/services/github/get-user.ts b/src/services/github/get-user.ts index 23e1b1c1c..6774c4492 100644 --- a/src/services/github/get-user.ts +++ b/src/services/github/get-user.ts @@ -1,9 +1,9 @@ -import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config" +import { getGitHubApiBaseUrl, standardHeaders } from "~/lib/api-config" import { HTTPError } from "~/lib/error" import { state } from "~/lib/state" export async function getGitHubUser() { - const response = await fetch(`${GITHUB_API_BASE_URL}/user`, { + const response = await fetch(`${getGitHubApiBaseUrl()}/user`, { headers: { authorization: `token ${state.githubToken}`, ...standardHeaders(), diff --git a/src/services/github/poll-access-token.ts b/src/services/github/poll-access-token.ts index 4639ee0dc..44c4a07b8 100644 --- a/src/services/github/poll-access-token.ts +++ b/src/services/github/poll-access-token.ts @@ -1,10 +1,6 @@ import consola from "consola" -import { - GITHUB_BASE_URL, - GITHUB_CLIENT_ID, - standardHeaders, -} from "~/lib/api-config" +import { getOauthAppConfig, getOauthUrls } from "~/lib/api-config" import { sleep } from "~/lib/utils" import type { DeviceCodeResponse } from "./get-device-code" @@ -12,24 +8,24 @@ import type { DeviceCodeResponse } from "./get-device-code" export async function pollAccessToken( deviceCode: DeviceCodeResponse, ): Promise { + const { clientId, headers } = getOauthAppConfig() + const { accessTokenUrl } = getOauthUrls() + // Interval is in seconds, we need to multiply by 1000 to get milliseconds // I'm also adding another second, just to be safe const sleepDuration = (deviceCode.interval + 1) * 1000 consola.debug(`Polling access token with interval of ${sleepDuration}ms`) while (true) { - const response = await fetch( - `${GITHUB_BASE_URL}/login/oauth/access_token`, - { - method: "POST", - headers: standardHeaders(), - body: JSON.stringify({ - client_id: GITHUB_CLIENT_ID, - device_code: deviceCode.device_code, - grant_type: "urn:ietf:params:oauth:grant-type:device_code", - }), - }, - ) + const response = await fetch(accessTokenUrl, { + method: "POST", + headers, + body: JSON.stringify({ + client_id: clientId, + device_code: deviceCode.device_code, + grant_type: "urn:ietf:params:oauth:grant-type:device_code", + }), + }) if (!response.ok) { await sleep(sleepDuration) diff --git a/src/services/providers/anthropic-proxy.ts b/src/services/providers/anthropic-proxy.ts new file mode 100644 index 000000000..45f288802 --- /dev/null +++ b/src/services/providers/anthropic-proxy.ts @@ -0,0 +1,80 @@ +import type { ResolvedProviderConfig } from "~/lib/config" +import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types" + +const FORWARDABLE_HEADERS = [ + "anthropic-version", + "anthropic-beta", + "accept", + "user-agent", +] as const + +const STRIPPED_RESPONSE_HEADERS = [ + "connection", + "content-encoding", + "content-length", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailer", + "transfer-encoding", + "upgrade", +] as const + +export function buildProviderUpstreamHeaders( + providerConfig: ResolvedProviderConfig, + requestHeaders: Headers, +): Record { + const headers: Record = { + "content-type": "application/json", + accept: "application/json", + "x-api-key": providerConfig.apiKey, + } + + for (const headerName of FORWARDABLE_HEADERS) { + const headerValue = requestHeaders.get(headerName) + if (headerValue) { + headers[headerName] = headerValue + } + } + + return headers +} + +export function createProviderProxyResponse( + upstreamResponse: Response, +): Response { + const headers = new Headers(upstreamResponse.headers) + + for (const headerName of STRIPPED_RESPONSE_HEADERS) { + headers.delete(headerName) + } + + return new Response(upstreamResponse.body, { + headers, + status: upstreamResponse.status, + statusText: upstreamResponse.statusText, + }) +} + +export async function forwardProviderMessages( + providerConfig: ResolvedProviderConfig, + payload: AnthropicMessagesPayload, + requestHeaders: Headers, +): Promise { + return await fetch(`${providerConfig.baseUrl}/v1/messages`, { + method: "POST", + headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders), + body: JSON.stringify(payload), + }) +} + +export async function forwardProviderModels( + providerConfig: ResolvedProviderConfig, + requestHeaders: Headers, +): Promise { + return await fetch(`${providerConfig.baseUrl}/v1/models`, { + method: "GET", + headers: buildProviderUpstreamHeaders(providerConfig, requestHeaders), + }) +} diff --git a/src/start.ts b/src/start.ts index 922ddaa69..52eaeee14 100644 --- a/src/start.ts +++ b/src/start.ts @@ -7,13 +7,18 @@ import { serve, type ServerHandler } from "srvx" import invariant from "tiny-invariant" import { APP_NAME, USAGE_VIEWER_URL } from "./lib/app-info" +import { mergeConfigWithDefaults } from "./lib/config" import { ensurePaths } from "./lib/paths" import { initProxyFromEnv } from "./lib/proxy" import { generateEnvScript } from "./lib/shell" import { state } from "./lib/state" import { setupCopilotToken, setupGitHubToken } from "./lib/token" -import { cacheModels, cacheVSCodeVersion } from "./lib/utils" -import { server } from "./server" +import { + cacheMacMachineId, + cacheModels, + cacheVSCodeVersion, + cacheVsCodeSessionId, +} from "./lib/utils" interface RunServerOptions { port: number @@ -30,10 +35,14 @@ interface RunServerOptions { } export async function runServer(options: RunServerOptions): Promise { + // Ensure config is merged with defaults at startup + mergeConfigWithDefaults() + if (options.proxyEnv) { initProxyFromEnv() } + state.verbose = options.verbose if (options.verbose) { consola.level = 5 consola.info("Verbose logging enabled") @@ -56,6 +65,8 @@ export async function runServer(options: RunServerOptions): Promise { await ensurePaths() await cacheVSCodeVersion() + cacheMacMachineId() + cacheVsCodeSessionId() if (options.githubToken) { state.githubToken = options.githubToken @@ -74,6 +85,11 @@ export async function runServer(options: RunServerOptions): Promise { const serverUrl = `http://localhost:${options.port}` if (options.claudeCode) { + consola.log( + "\nšŸ’” Tip: The --claude-code flag simply generates a clipboard command for launching Claude Code. \n" + + "All models remain fully accessible without this flag, just configure the model ID directly in your settings.json file.", + ) + invariant(state.models, "Models should be loaded by now") const selectedModel = await consola.prompt( @@ -98,10 +114,11 @@ export async function runServer(options: RunServerOptions): Promise { ANTHROPIC_AUTH_TOKEN: "dummy", ANTHROPIC_MODEL: selectedModel, ANTHROPIC_DEFAULT_SONNET_MODEL: selectedModel, - ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel, ANTHROPIC_DEFAULT_HAIKU_MODEL: selectedSmallModel, DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1", CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1", + CLAUDE_CODE_ATTRIBUTION_HEADER: "0", + CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION: "false", }, "claude", ) @@ -121,9 +138,14 @@ export async function runServer(options: RunServerOptions): Promise { `🌐 ${APP_NAME} Usage Viewer: ${USAGE_VIEWER_URL}?endpoint=${serverUrl}/usage`, ) + const { server } = await import("./server") + serve({ fetch: server.fetch as ServerHandler, port: options.port, + bun: { + idleTimeout: 0, + }, }) } diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c663778..1596a3941 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -136,6 +136,8 @@ describe("Anthropic to OpenAI translation logic", () => { { type: "thinking", thinking: "Let me think about this simple math problem...", + // Note: thinking blocks without signatures are filtered for claude models + signature: "", }, { type: "text", text: "2+2 equals 4." }, ], @@ -146,13 +148,11 @@ describe("Anthropic to OpenAI translation logic", () => { const openAIPayload = translateToOpenAI(anthropicPayload) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) - // Check that thinking content is combined with text content + // Thinking blocks without valid signatures are filtered for claude models. + // Text content should still be present. const assistantMessage = openAIPayload.messages.find( (m) => m.role === "assistant", ) - expect(assistantMessage?.content).toContain( - "Let me think about this simple math problem...", - ) expect(assistantMessage?.content).toContain("2+2 equals 4.") }) @@ -168,6 +168,8 @@ describe("Anthropic to OpenAI translation logic", () => { type: "thinking", thinking: "I need to call the weather API to get current weather information.", + // Note: thinking blocks without signatures are filtered for claude models + signature: "", }, { type: "text", text: "I'll check the weather for you." }, { @@ -184,13 +186,11 @@ describe("Anthropic to OpenAI translation logic", () => { const openAIPayload = translateToOpenAI(anthropicPayload) expect(isValidChatCompletionRequest(openAIPayload)).toBe(true) - // Check that thinking content is included in the message content + // Thinking blocks without valid signatures are filtered for claude models. + // Text content and tool calls should still be present. const assistantMessage = openAIPayload.messages.find( (m) => m.role === "assistant", ) - expect(assistantMessage?.content).toContain( - "I need to call the weather API", - ) expect(assistantMessage?.content).toContain( "I'll check the weather for you.", ) diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts index 9f4c989f5..6208f09f6 100644 --- a/tests/create-chat-completions.test.ts +++ b/tests/create-chat-completions.test.ts @@ -37,7 +37,7 @@ test("sets X-Initiator to agent if tool/assistant present", async () => { const headers = ( fetchMock.mock.calls[0][1] as { headers: Record } ).headers - expect(headers["X-Initiator"]).toBe("agent") + expect(headers["x-initiator"]).toBe("agent") }) test("sets X-Initiator to user if only user present", async () => { @@ -53,7 +53,7 @@ test("sets X-Initiator to user if only user present", async () => { const headers = ( fetchMock.mock.calls[1][1] as { headers: Record } ).headers - expect(headers["X-Initiator"]).toBe("user") + expect(headers["x-initiator"]).toBe("user") }) test("forces X-Initiator to agent in all-agent mode", async () => { @@ -69,7 +69,7 @@ test("forces X-Initiator to agent in all-agent mode", async () => { const headers = ( fetchMock.mock.calls[2][1] as { headers: Record } ).headers - expect(headers["X-Initiator"]).toBe("agent") + expect(headers["x-initiator"]).toBe("agent") } finally { state.forceAgentInitiator = false } From 3635f5f799fe0677747f49c0e7ad1fb575bf9e6e Mon Sep 17 00:00:00 2001 From: BlueSkyXN <63384277+BlueSkyXN@users.noreply.github.com> Date: Mon, 30 Mar 2026 20:36:50 +0800 Subject: [PATCH 6/6] fix: add missing thinkingBlockOpen to AnthropicStreamState in tests The AnthropicStreamState type requires thinkingBlockOpen but the test objects at lines 250 and 350 were missing it, causing TS2741 errors. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/anthropic-response.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/anthropic-response.test.ts b/tests/anthropic-response.test.ts index ecd71aacc..3bd7e4919 100644 --- a/tests/anthropic-response.test.ts +++ b/tests/anthropic-response.test.ts @@ -251,6 +251,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { messageStartSent: false, contentBlockIndex: 0, contentBlockOpen: false, + thinkingBlockOpen: false, toolCalls: {}, } const translatedStream = openAIStream.flatMap((chunk) => @@ -351,6 +352,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { messageStartSent: false, contentBlockIndex: 0, contentBlockOpen: false, + thinkingBlockOpen: false, toolCalls: {}, } const translatedStream = openAIStream.flatMap((chunk) =>