From 1abf7865720769088e8fcd1e2ee3d3d456372263 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 13:47:04 +0800
Subject: [PATCH 01/34] feat(db): add SQLite module with migrations (Task #1)

Introduce bun:sqlite-backed db module with WAL mode, foreign keys,
and a meta-table-driven migration runner. Initial schema (001) creates
accounts, model_pricing, model_pricing_versions, pricing_sync_log,
usage_events, usage_daily and supporting indexes.

- src/lib/db.ts: initDb/getDb/withTransaction + test-only reset helper
- src/lib/migrations/001_initial.sql: full schema per design doc 02
- src/lib/paths.ts: USAGE_DB_PATH under APP_DIR
- src/start.ts: --db-path flag, initDb after ensurePaths
- src/types.d.ts: declare *.sql text imports
- tests/db.test.ts: 8 tests covering init, idempotency, tx, WAL, indexes

Closes #1

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/db.ts                      |  99 +++++++++++++++++
 src/lib/migrations/001_initial.sql | 113 +++++++++++++++++++
 src/lib/paths.ts                   |   2 +
 src/start.ts                       |  12 +-
 src/types.d.ts                     |   4 +
 tests/db.test.ts                   | 170 +++++++++++++++++++++++++++++
 6 files changed, 399 insertions(+), 1 deletion(-)
 create mode 100644 src/lib/db.ts
 create mode 100644 src/lib/migrations/001_initial.sql
 create mode 100644 src/types.d.ts
 create mode 100644 tests/db.test.ts
diff --git a/src/lib/db.ts b/src/lib/db.ts
new file mode 100644
index 000000000..8df341dea
--- /dev/null
+++ b/src/lib/db.ts
@@ -0,0 +1,99 @@
+import { Database } from "bun:sqlite"
+import fs from "node:fs"
+import path from "node:path"
+
+import migration001 from "./migrations/001_initial.sql" with { type: "text" }
+
+export const CURRENT_SCHEMA_VERSION = 1
+
+const MIGRATIONS: Array<{ version: number; sql: string }> = [
+  { version: 1, sql: migration001 },
+]
+
+let dbInstance: Database | undefined
+
+export function initDb(dbPath: string): Database {
+  if (dbInstance) return dbInstance
+
+  if (dbPath !== ":memory:") {
+    fs.mkdirSync(path.dirname(dbPath), { recursive: true })
+  }
+
+  const db = new Database(dbPath, { create: true })
+
+  // Pragmas — set before any schema work.
+  db.run("PRAGMA journal_mode = WAL")
+  db.run("PRAGMA synchronous = NORMAL")
+  db.run("PRAGMA foreign_keys = ON")
+
+  runMigrations(db)
+
+  dbInstance = db
+  return db
+}
+
+export function getDb(): Database {
+  if (!dbInstance) {
+    throw new Error(
+      "Database not initialized. Call initDb(path) before getDb().",
+    )
+  }
+  return dbInstance
+}
+
+export function withTransaction<T>(fn: (db: Database) => T): T {
+  const db = getDb()
+  const tx = db.transaction((arg: () => T) => arg())
+  return tx(() => fn(db))
+}
+
+/**
+ * Test-only helper. Closes any current instance and clears the singleton so
+ * the next initDb call starts from scratch. Production code must never call
+ * this — it exists to keep tests isolated.
+ */
+export function __resetDbForTests(): void {
+  if (dbInstance) {
+    try {
+      dbInstance.close()
+    } catch {
+      // ignore
+    }
+    dbInstance = undefined
+  }
+}
+
+function runMigrations(db: Database): void {
+  // Bootstrap meta table so we can read schema_version.
+  db.run(
+    "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
+  )
+
+  const row = db
+    .query<
+      { value: string },
+      []
+    >("SELECT value FROM meta WHERE key='schema_version'")
+    .get()
+  const currentVersion = row ? Number.parseInt(row.value, 10) : 0
+
+  const pending = MIGRATIONS.filter((m) => m.version > currentVersion).sort(
+    (a, b) => a.version - b.version,
+  )
+
+  if (pending.length === 0) return
+
+  const apply = db.transaction(() => {
+    for (const m of pending) {
+      // Migration SQL contains multiple statements; only exec() handles that.
+      // eslint-disable-next-line @typescript-eslint/no-deprecated
+      db.exec(m.sql)
+    }
+    db.run(
+      "INSERT INTO meta (key, value) VALUES ('schema_version', ?) "
+        + "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
+      [String(CURRENT_SCHEMA_VERSION)],
+    )
+  })
+  apply()
+}
diff --git a/src/lib/migrations/001_initial.sql b/src/lib/migrations/001_initial.sql
new file mode 100644
index 000000000..3f91480e5
--- /dev/null
+++ b/src/lib/migrations/001_initial.sql
@@ -0,0 +1,113 @@
+-- Initial schema for copilot-api multi-account & usage billing.
+-- Owned by tasks #1 (this migration), #2 (accounts loader), #6 (usage recorder),
+-- #13 (pricing version writer). See docs/design/.
+
+CREATE TABLE IF NOT EXISTS accounts (
+  name TEXT PRIMARY KEY,
+  account_type TEXT NOT NULL,
+  created_at INTEGER NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS model_pricing (
+  model_id TEXT PRIMARY KEY,
+  input_per_mtok REAL,
+  cached_input_per_mtok REAL,
+  output_per_mtok REAL,
+  reasoning_per_mtok REAL,
+  premium_multiplier REAL,
+  premium_unit_price REAL,
+  currency TEXT NOT NULL DEFAULT 'USD',
+  source TEXT,
+  source_skus TEXT,
+  updated_at INTEGER NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS pricing_sync_log (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  ts INTEGER NOT NULL,
+  status TEXT NOT NULL,
+  source_count INTEGER,
+  llm_model TEXT,
+  models_updated INTEGER,
+  models_rejected INTEGER,
+  error TEXT,
+  raw_request_json TEXT,
+  raw_response_json TEXT,
+  diff_json TEXT
+);
+
+CREATE TABLE IF NOT EXISTS model_pricing_versions (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  model_id TEXT NOT NULL,
+  effective_from INTEGER NOT NULL,
+  effective_to INTEGER,
+  input_per_mtok REAL,
+  cached_input_per_mtok REAL,
+  output_per_mtok REAL,
+  reasoning_per_mtok REAL,
+  premium_multiplier REAL,
+  premium_unit_price REAL,
+  currency TEXT NOT NULL DEFAULT 'USD',
+  source TEXT,
+  source_skus TEXT,
+  sync_log_id INTEGER,
+  created_at INTEGER NOT NULL,
+  FOREIGN KEY (sync_log_id) REFERENCES pricing_sync_log(id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_pricing_versions_model_time
+  ON model_pricing_versions(model_id, effective_from);
+
+CREATE INDEX IF NOT EXISTS idx_pricing_versions_current
+  ON model_pricing_versions(model_id) WHERE effective_to IS NULL;
+
+CREATE TABLE IF NOT EXISTS usage_events (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  ts INTEGER NOT NULL,
+  account_name TEXT NOT NULL,
+  model_id TEXT NOT NULL,
+  endpoint TEXT NOT NULL,
+  upstream_format TEXT NOT NULL,
+  is_streaming INTEGER NOT NULL,
+  input_tokens INTEGER DEFAULT 0,
+  cached_input_tokens INTEGER DEFAULT 0,
+  output_tokens INTEGER DEFAULT 0,
+  reasoning_tokens INTEGER DEFAULT 0,
+  total_tokens INTEGER DEFAULT 0,
+  premium_request_count REAL DEFAULT 0,
+  input_price_snapshot REAL,
+  cached_input_price_snapshot REAL,
+  output_price_snapshot REAL,
+  reasoning_price_snapshot REAL,
+  premium_unit_price_snapshot REAL,
+  premium_multiplier_snapshot REAL,
+  request_id TEXT,
+  status TEXT NOT NULL,
+  duration_ms INTEGER,
+  FOREIGN KEY (account_name) REFERENCES accounts(name)
+);
+
+CREATE INDEX IF NOT EXISTS idx_usage_account_model_ts
+  ON usage_events(account_name, model_id, ts);
+
+CREATE INDEX IF NOT EXISTS idx_usage_ts ON usage_events(ts);
+
+CREATE TABLE IF NOT EXISTS usage_daily (
+  day TEXT NOT NULL,
+  account_name TEXT NOT NULL,
+  model_id TEXT NOT NULL,
+  endpoint TEXT NOT NULL,
+  req_count INTEGER NOT NULL DEFAULT 0,
+  input_tokens INTEGER NOT NULL DEFAULT 0,
+  cached_input_tokens INTEGER NOT NULL DEFAULT 0,
+  output_tokens INTEGER NOT NULL DEFAULT 0,
+  reasoning_tokens INTEGER NOT NULL DEFAULT 0,
+  total_tokens INTEGER NOT NULL DEFAULT 0,
+  premium_requests REAL NOT NULL DEFAULT 0,
+  PRIMARY KEY (day, account_name, model_id, endpoint)
+);
+
+CREATE TABLE IF NOT EXISTS meta (
+  key TEXT PRIMARY KEY,
+  value TEXT NOT NULL
+);
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 8d0a9f02b..231560d12 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -5,10 +5,12 @@ import path from "node:path"
 const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
 
 const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
+const USAGE_DB_PATH = path.join(APP_DIR, "usage.sqlite")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
+  USAGE_DB_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
diff --git a/src/start.ts b/src/start.ts
index 14abbbdff..d3539362a 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -6,7 +6,8 @@ import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
-import { ensurePaths } from "./lib/paths"
+import { initDb } from "./lib/db"
+import { ensurePaths, PATHS } from "./lib/paths"
 import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
@@ -25,6 +26,7 @@ interface RunServerOptions {
   claudeCode: boolean
   showToken: boolean
   proxyEnv: boolean
+  dbPath: string
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
@@ -48,6 +50,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   state.showToken = options.showToken
 
   await ensurePaths()
+  initDb(options.dbPath)
   await cacheVSCodeVersion()
 
   if (options.githubToken) {
@@ -184,6 +187,12 @@ export const start = defineCommand({
       default: false,
       description: "Initialize proxy from environment variables",
     },
+    "db-path": {
+      type: "string",
+      default: PATHS.USAGE_DB_PATH,
+      description:
+        "Path to the usage SQLite database (defaults to ~/.local/share/copilot-api/usage.sqlite)",
+    },
   },
   run({ args }) {
     const rateLimitRaw = args["rate-limit"]
@@ -202,6 +211,7 @@ export const start = defineCommand({
       claudeCode: args["claude-code"],
       showToken: args["show-token"],
       proxyEnv: args["proxy-env"],
+      dbPath: args["db-path"],
     })
   },
 })
diff --git a/src/types.d.ts b/src/types.d.ts
new file mode 100644
index 000000000..28cb1e264
--- /dev/null
+++ b/src/types.d.ts
@@ -0,0 +1,4 @@
+declare module "*.sql" {
+  const content: string
+  export default content
+}
diff --git a/tests/db.test.ts b/tests/db.test.ts
new file mode 100644
index 000000000..b61512c41
--- /dev/null
+++ b/tests/db.test.ts
@@ -0,0 +1,170 @@
+import { test, expect, describe, beforeEach } from "bun:test"
+import fs from "node:fs"
+import os from "node:os"
+import path from "node:path"
+
+import {
+  initDb,
+  getDb,
+  withTransaction,
+  CURRENT_SCHEMA_VERSION,
+  __resetDbForTests,
+} from "../src/lib/db"
+
+const tmpDbPath = () =>
+  path.join(
+    os.tmpdir(),
+    `copilot-api-test-${Date.now()}-${Math.random().toString(36).slice(2)}.sqlite`,
+  )
+
+describe("db module", () => {
+  beforeEach(() => {
+    __resetDbForTests()
+  })
+
+  test("initDb on a fresh path creates all tables and sets schema_version", () => {
+    const p = tmpDbPath()
+    const db = initDb(p)
+
+    const tables = db
+      .query<{ name: string }, []>(
+        "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",
+      )
+      .all()
+      .map((r) => r.name)
+
+    for (const t of [
+      "accounts",
+      "model_pricing",
+      "model_pricing_versions",
+      "pricing_sync_log",
+      "usage_daily",
+      "usage_events",
+      "meta",
+    ]) {
+      expect(tables).toContain(t)
+    }
+
+    const ver = db
+      .query<
+        { value: string },
+        []
+      >("SELECT value FROM meta WHERE key='schema_version'")
+      .get()
+    expect(ver?.value).toBe(String(CURRENT_SCHEMA_VERSION))
+
+    db.close()
+    fs.unlinkSync(p)
+  })
+
+  test("initDb is idempotent: running twice leaves schema_version unchanged and does not duplicate rows", () => {
+    const p = tmpDbPath()
+    const db1 = initDb(p)
+    db1.run(
+      "INSERT INTO meta (key, value) VALUES ('marker', 'persisted') "
+        + "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
+    )
+    db1.close()
+
+    __resetDbForTests()
+    const db2 = initDb(p)
+    const marker = db2
+      .query<{ value: string }, []>("SELECT value FROM meta WHERE key='marker'")
+      .get()
+    expect(marker?.value).toBe("persisted")
+
+    const ver = db2
+      .query<
+        { value: string },
+        []
+      >("SELECT value FROM meta WHERE key='schema_version'")
+      .get()
+    expect(ver?.value).toBe(String(CURRENT_SCHEMA_VERSION))
+
+    db2.close()
+    fs.unlinkSync(p)
+  })
+
+  test("getDb throws before initDb is called", () => {
+    expect(() => getDb()).toThrow()
+  })
+
+  test("getDb returns the initialized instance", () => {
+    const p = tmpDbPath()
+    const db = initDb(p)
+    expect(getDb()).toBe(db)
+    db.close()
+    fs.unlinkSync(p)
+  })
+
+  test("withTransaction commits on success", () => {
+    const p = tmpDbPath()
+    const db = initDb(p)
+
+    withTransaction((d) => {
+      d.run(
+        "INSERT INTO accounts (name, account_type, created_at) "
+          + "VALUES ('a', 'individual', 1)",
+      )
+    })
+
+    const row = db
+      .query<{ name: string }, []>("SELECT name FROM accounts WHERE name='a'")
+      .get()
+    expect(row?.name).toBe("a")
+
+    db.close()
+    fs.unlinkSync(p)
+  })
+
+  test("withTransaction rolls back on throw", () => {
+    const p = tmpDbPath()
+    const db = initDb(p)
+
+    expect(() =>
+      withTransaction((d) => {
+        d.run(
+          "INSERT INTO accounts (name, account_type, created_at) "
+            + "VALUES ('b', 'individual', 1)",
+        )
+        throw new Error("boom")
+      }),
+    ).toThrow("boom")
+
+    const row = db
+      .query<{ name: string }, []>("SELECT name FROM accounts WHERE name='b'")
+      .get()
+    expect(row).toBeNull()
+
+    db.close()
+    fs.unlinkSync(p)
+  })
+
+  test("WAL mode is enabled", () => {
+    const p = tmpDbPath()
+    const db = initDb(p)
+    const mode = db
+      .query<{ journal_mode: string }, []>("PRAGMA journal_mode")
+      .get()
+    expect(mode?.journal_mode.toLowerCase()).toBe("wal")
+    db.close()
+    fs.unlinkSync(p)
+  })
+
+  test("schema includes expected indexes", () => {
+    const p = tmpDbPath()
+    const db = initDb(p)
+    const idxs = db
+      .query<{ name: string }, []>(
+        "SELECT name FROM sqlite_master WHERE type='index'",
+      )
+      .all()
+      .map((r) => r.name)
+    expect(idxs).toContain("idx_usage_account_model_ts")
+    expect(idxs).toContain("idx_usage_ts")
+    expect(idxs).toContain("idx_pricing_versions_model_time")
+    expect(idxs).toContain("idx_pricing_versions_current")
+    db.close()
+    fs.unlinkSync(p)
+  })
+})

From 5b9a0f019928c0886d81518b300ac85f95348820 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:00:03 +0800
Subject: [PATCH 02/34] feat(accounts): multi-account pool skeleton (Task #2)

- src/lib/account-pool.ts: AccountPool with round-robin / least-busy / least-recent strategies, acquire/release, cooldown and failure tracking
- src/lib/accounts-loader.ts: load accounts.json or fall back to legacy single token; persistAccounts() upserts into accounts table
- src/lib/state.ts: pool + strategy on State (legacy githubToken/copilotToken kept as shims for not-yet-migrated callers; removed in #3)
- src/lib/token.ts: setupCopilotTokenFor(account) sets up per-account refresh interval
- src/start.ts: --accounts-file, --strategy flags; load + persist + parallel token init

Tests: AccountPool picker + cooldown + acquire/release; loader file/legacy/empty paths; persistAccounts idempotency.

Refs #2

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/account-pool.ts       | 86 +++++++++++++++++++++++++++++++
 src/lib/accounts-loader.ts    | 80 +++++++++++++++++++++++++++++
 src/lib/state.ts              | 15 ++++++
 src/lib/token.ts              | 55 ++++++++++++++++----
 src/start.ts                  | 58 ++++++++++++++++++---
 tests/account-pool.test.ts    | 96 +++++++++++++++++++++++++++++++++++
 tests/accounts-loader.test.ts | 89 ++++++++++++++++++++++++++++++++
 7 files changed, 462 insertions(+), 17 deletions(-)
 create mode 100644 src/lib/account-pool.ts
 create mode 100644 src/lib/accounts-loader.ts
 create mode 100644 tests/account-pool.test.ts
 create mode 100644 tests/accounts-loader.test.ts

diff --git a/src/lib/account-pool.ts b/src/lib/account-pool.ts
new file mode 100644
index 000000000..98c6c655a
--- /dev/null
+++ b/src/lib/account-pool.ts
@@ -0,0 +1,86 @@
+export interface Account {
+  name: string
+  accountType: string
+  githubToken: string
+  copilotToken?: string
+  copilotTokenRefreshAt: number
+  inFlight: number
+  lastUsedAt: number
+  cooldownUntil?: number
+  failureCount: number
+  refreshTimer?: ReturnType<typeof setInterval>
+}
+
+export type Strategy = "round-robin" | "least-busy" | "least-recent"
+
+export class AccountPool {
+  private cursor = 0
+  public readonly accounts: Array<Account>
+  public strategy: Strategy
+
+  constructor(accounts: Array<Account>, strategy: Strategy) {
+    this.accounts = accounts
+    this.strategy = strategy
+  }
+
+  /** Returns usable accounts: have copilot token AND not on cooldown. */
+  private usable(): Array<Account> {
+    const now = Date.now()
+    return this.accounts.filter(
+      (a) => a.copilotToken && (a.cooldownUntil ?? 0) <= now,
+    )
+  }
+
+  pick(): Account {
+    const candidates = this.usable()
+    if (candidates.length === 0) {
+      throw new Error(
+        "No usable Copilot accounts (all on cooldown or unauthenticated)",
+      )
+    }
+    // eslint-disable-next-line default-case
+    switch (this.strategy) {
+      case "round-robin": {
+        const a = candidates[this.cursor % candidates.length]
+        this.cursor = (this.cursor + 1) % Math.max(candidates.length, 1)
+        return a
+      }
+      case "least-busy": {
+        return candidates.reduce((best, cur) => {
+          if (cur.inFlight !== best.inFlight)
+            return cur.inFlight < best.inFlight ? cur : best
+          return cur.lastUsedAt < best.lastUsedAt ? cur : best
+        })
+      }
+      case "least-recent": {
+        return candidates.reduce((best, cur) =>
+          cur.lastUsedAt < best.lastUsedAt ? cur : best,
+        )
+      }
+      // No default — Strategy union is exhaustively handled.
+    }
+  }
+
+  acquire(): Account {
+    const a = this.pick()
+    a.inFlight += 1
+    return a
+  }
+
+  release(a: Account): void {
+    a.inFlight = Math.max(0, a.inFlight - 1)
+    a.lastUsedAt = Date.now()
+  }
+
+  markCooldown(a: Account, ms: number): void {
+    a.cooldownUntil = Date.now() + ms
+  }
+
+  markFailure(a: Account): void {
+    a.failureCount += 1
+  }
+
+  byName(name: string): Account | undefined {
+    return this.accounts.find((a) => a.name === name)
+  }
+}
diff --git a/src/lib/accounts-loader.ts b/src/lib/accounts-loader.ts
new file mode 100644
index 000000000..5addf7447
--- /dev/null
+++ b/src/lib/accounts-loader.ts
@@ -0,0 +1,80 @@
+import fs from "node:fs/promises"
+import path from "node:path"
+
+import type { Account } from "./account-pool"
+
+import { getDb } from "./db"
+
+export interface AccountsFileEntry {
+  name: string
+  github_token: string
+  account_type?: string
+}
+
+export interface AccountsFile {
+  accounts: Array<AccountsFileEntry>
+}
+
+export interface LoadAccountsOptions {
+  accountsFile?: string
+  legacyToken?: string
+  defaultAccountType: string
+}
+
+const FRESH = (): Pick<
+  Account,
+  | "copilotToken"
+  | "copilotTokenRefreshAt"
+  | "inFlight"
+  | "lastUsedAt"
+  | "failureCount"
+> => ({
+  copilotToken: undefined,
+  copilotTokenRefreshAt: 0,
+  inFlight: 0,
+  lastUsedAt: 0,
+  failureCount: 0,
+})
+
+export async function loadAccounts(
+  options: LoadAccountsOptions,
+): Promise<Array<Account>> {
+  const accounts: Array<Account> = []
+
+  if (options.accountsFile) {
+    const buf = await fs.readFile(path.resolve(options.accountsFile))
+    const parsed = JSON.parse(buf.toString("utf8")) as AccountsFile
+    for (const e of parsed.accounts) {
+      accounts.push({
+        name: e.name,
+        accountType: e.account_type ?? options.defaultAccountType,
+        githubToken: e.github_token,
+        ...FRESH(),
+      })
+    }
+  } else if (options.legacyToken && options.legacyToken.length > 0) {
+    accounts.push({
+      name: "default",
+      accountType: options.defaultAccountType,
+      githubToken: options.legacyToken,
+      ...FRESH(),
+    })
+  }
+
+  return accounts
+}
+
+/** Insert any new accounts into the `accounts` table (idempotent). */
+export function persistAccounts(accounts: Array<Account>): void {
+  const db = getDb()
+  const stmt = db.prepare(
+    "INSERT OR IGNORE INTO accounts (name, account_type, created_at) VALUES (?, ?, ?)",
+  )
+  const now = Date.now()
+  const tx = db.transaction((rows: Array<Account>) => {
+    for (const a of rows) {
+      stmt.run(a.name, a.accountType, now)
+    }
+  })
+  tx(accounts)
+}
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5ba4dc1d1..7c9c45537 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -1,6 +1,15 @@
 import type { ModelsResponse } from "~/services/copilot/get-models"
 
+import type { Account, Strategy } from "./account-pool"
+import type { AccountPool } from "./account-pool"
+
 export interface State {
+  // Multi-account pool. Until task 03 wires service code through it,
+  // legacy fields below mirror the "default" account.
+  pool?: AccountPool
+  strategy: Strategy
+
+  // Legacy fields (deprecated; will be removed in task 03):
   githubToken?: string
   copilotToken?: string
 
@@ -19,7 +28,13 @@ export interface State {
 
 export const state: State = {
   accountType: "individual",
+  strategy: "round-robin",
   manualApprove: false,
   rateLimitWait: false,
   showToken: false,
 }
+
+/** Convenience: the first usable account, used by legacy single-account paths. */
+export function defaultAccount(): Account | undefined {
+  return state.pool?.accounts[0]
+}
diff --git a/src/lib/token.ts b/src/lib/token.ts
index fc8d2785f..cbebdc5df 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -1,6 +1,8 @@
 import consola from "consola"
 import fs from "node:fs/promises"
 
+import type { Account } from "~/lib/account-pool"
+
 import { PATHS } from "~/lib/paths"
 import { getCopilotToken } from "~/services/github/get-copilot-token"
 import { getDeviceCode } from "~/services/github/get-device-code"
@@ -15,28 +17,50 @@ const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8")
 const writeGithubToken = (token: string) =>
   fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token)
 
-export const setupCopilotToken = async () => {
+/**
+ * Set up the Copilot token for a single account, including auto-refresh.
+ * The previous global helper `setupCopilotToken` is replaced by per-account
+ * setup; legacy `state.copilotToken` is mirrored for not-yet-migrated callers.
+ */
+export const setupCopilotTokenFor = async (account: Account) => {
+  // Temporarily expose this account's GitHub token for the legacy
+  // api-config helper which still reads `state.githubToken`.
+  state.githubToken = account.githubToken
   const { token, refresh_in } = await getCopilotToken()
+  /* eslint-disable require-atomic-updates */
+  account.copilotToken = token
+  account.copilotTokenRefreshAt = Date.now() + refresh_in * 1000
+  /* eslint-enable require-atomic-updates */
+
+  // Mirror the first account's token into legacy state for callers
+  // not yet migrated to the pool (removed in task 03).
   state.copilotToken = token
 
-  // Display the Copilot token to the screen
-  consola.debug("GitHub Copilot Token fetched successfully!")
+  consola.debug(`[${account.name}] Copilot token fetched successfully`)
   if (state.showToken) {
-    consola.info("Copilot token:", token)
+    consola.info(`[${account.name}] Copilot token:`, token)
   }
 
   const refreshInterval = (refresh_in - 60) * 1000
-  setInterval(async () => {
-    consola.debug("Refreshing Copilot token")
+  account.refreshTimer = setInterval(async () => {
+    consola.debug(`[${account.name}] Refreshing Copilot token`)
     try {
-      const { token } = await getCopilotToken()
-      state.copilotToken = token
-      consola.debug("Copilot token refreshed")
+      state.githubToken = account.githubToken
+      const refreshed = await getCopilotToken()
+      /* eslint-disable require-atomic-updates */
+      account.copilotToken = refreshed.token
+      account.copilotTokenRefreshAt = Date.now() + refreshed.refresh_in * 1000
+      /* eslint-enable require-atomic-updates */
+      state.copilotToken = refreshed.token
+      consola.debug(`[${account.name}] Copilot token refreshed`)
       if (state.showToken) {
-        consola.info("Refreshed Copilot token:", token)
+        consola.info(
+          `[${account.name}] Refreshed Copilot token:`,
+          refreshed.token,
+        )
       }
     } catch (error) {
-      consola.error("Failed to refresh Copilot token:", error)
+      consola.error(`[${account.name}] Failed to refresh Copilot token:`, error)
       throw error
     }
   }, refreshInterval)
@@ -89,6 +113,15 @@ export async function setupGitHubToken(
   }
 }
 
+/** Backwards-compat wrapper: sets up Copilot token for the default account. */
+export const setupCopilotToken = async () => {
+  if (state.pool && state.pool.accounts.length > 0) {
+    await setupCopilotTokenFor(state.pool.accounts[0])
+    return
+  }
+  // No pool yet (very early callers) — do nothing.
+}
+
 async function logUser() {
   const user = await getGitHubUser()
   consola.info(`Logged in as ${user.login}`)
diff --git a/src/start.ts b/src/start.ts
index d3539362a..ead59acd4 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -6,12 +6,14 @@ import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
+import { AccountPool, type Strategy } from "./lib/account-pool"
+import { loadAccounts, persistAccounts } from "./lib/accounts-loader"
 import { initDb } from "./lib/db"
 import { ensurePaths, PATHS } from "./lib/paths"
 import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
-import { setupCopilotToken, setupGitHubToken } from "./lib/token"
+import { setupCopilotTokenFor, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
 import { server } from "./server"
 
@@ -27,6 +29,8 @@ interface RunServerOptions {
   showToken: boolean
   proxyEnv: boolean
   dbPath: string
+  accountsFile?: string
+  strategy: Strategy
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
@@ -40,6 +44,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   }
 
   state.accountType = options.accountType
+  state.strategy = options.strategy
   if (options.accountType !== "individual") {
     consola.info(`Using ${options.accountType} plan GitHub account`)
   }
@@ -53,14 +58,42 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   initDb(options.dbPath)
   await cacheVSCodeVersion()
 
-  if (options.githubToken) {
-    state.githubToken = options.githubToken
-    consola.info("Using provided GitHub token")
-  } else {
+  // Resolve legacy single token if no accounts file is provided.
+  let legacyToken = options.githubToken
+  if (!options.accountsFile && !legacyToken) {
     await setupGitHubToken()
+    legacyToken = state.githubToken
+  } else if (legacyToken) {
+    consola.info("Using provided GitHub token")
+  }
+
+  const loaded = await loadAccounts({
+    accountsFile: options.accountsFile,
+    legacyToken,
+    defaultAccountType: options.accountType,
+  })
+
+  if (loaded.length === 0) {
+    throw new Error(
+      "No accounts available. Provide --accounts-file or --github-token, or run `auth`.",
+    )
+  }
+
+  const pool = new AccountPool(loaded, options.strategy)
+  // eslint-disable-next-line require-atomic-updates
+  state.pool = pool
+  persistAccounts(loaded)
+
+  consola.info(
+    `Loaded ${loaded.length} account${loaded.length === 1 ? "" : "s"} (strategy: ${options.strategy})`,
+  )
+
+  // Fetch Copilot token for each account in parallel.
+  await Promise.all(loaded.map((a) => setupCopilotTokenFor(a)))
+  for (const a of loaded) {
+    consola.info(`[${a.name}] ready`)
   }
 
-  await setupCopilotToken()
   await cacheModels()
 
   consola.info(
@@ -193,6 +226,17 @@ export const start = defineCommand({
       description:
         "Path to the usage SQLite database (defaults to ~/.local/share/copilot-api/usage.sqlite)",
     },
+    "accounts-file": {
+      type: "string",
+      description:
+        "Path to a JSON file containing multiple GitHub Copilot accounts",
+    },
+    strategy: {
+      type: "string",
+      default: "round-robin",
+      description:
+        "Account selection strategy: round-robin | least-busy | least-recent",
+    },
   },
   run({ args }) {
     const rateLimitRaw = args["rate-limit"]
@@ -212,6 +256,8 @@ export const start = defineCommand({
       showToken: args["show-token"],
       proxyEnv: args["proxy-env"],
       dbPath: args["db-path"],
+      accountsFile: args["accounts-file"],
+      strategy: args.strategy as Strategy,
     })
   },
 })
diff --git a/tests/account-pool.test.ts b/tests/account-pool.test.ts
new file mode 100644
index 000000000..e55c3f050
--- /dev/null
+++ b/tests/account-pool.test.ts
@@ -0,0 +1,96 @@
+import { test, expect, describe } from "bun:test"
+
+import { AccountPool, type Account } from "../src/lib/account-pool"
+
+const makeAccount = (overrides: Partial<Account> = {}): Account => ({
+  name: "a",
+  accountType: "individual",
+  githubToken: "ghu_a",
+  copilotToken: "tok_a",
+  copilotTokenRefreshAt: 0,
+  inFlight: 0,
+  lastUsedAt: 0,
+  failureCount: 0,
+  ...overrides,
+})
+
+describe("AccountPool", () => {
+  test("pick throws when no usable accounts", () => {
+    const pool = new AccountPool([], "round-robin")
+    expect(() => pool.pick()).toThrow()
+  })
+
+  test("pick returns account with copilot token", () => {
+    const a1 = makeAccount({ name: "a1", copilotToken: undefined })
+    const a2 = makeAccount({ name: "a2", copilotToken: "tok2" })
+    const pool = new AccountPool([a1, a2], "round-robin")
+    expect(pool.pick().name).toBe("a2")
+  })
+
+  test("round-robin rotates", () => {
+    const a1 = makeAccount({ name: "a1" })
+    const a2 = makeAccount({ name: "a2" })
+    const a3 = makeAccount({ name: "a3" })
+    const pool = new AccountPool([a1, a2, a3], "round-robin")
+    const order = [
+      pool.pick().name,
+      pool.pick().name,
+      pool.pick().name,
+      pool.pick().name,
+    ]
+    expect(order).toEqual(["a1", "a2", "a3", "a1"])
+  })
+
+  test("least-busy prefers lowest inFlight then oldest lastUsedAt", () => {
+    const a1 = makeAccount({ name: "a1", inFlight: 2, lastUsedAt: 100 })
+    const a2 = makeAccount({ name: "a2", inFlight: 1, lastUsedAt: 200 })
+    const a3 = makeAccount({ name: "a3", inFlight: 1, lastUsedAt: 50 })
+    const pool = new AccountPool([a1, a2, a3], "least-busy")
+    expect(pool.pick().name).toBe("a3")
+  })
+
+  test("least-recent picks oldest lastUsedAt", () => {
+    const a1 = makeAccount({ name: "a1", lastUsedAt: 200 })
+    const a2 = makeAccount({ name: "a2", lastUsedAt: 50 })
+    const pool = new AccountPool([a1, a2], "least-recent")
+    expect(pool.pick().name).toBe("a2")
+  })
+
+  test("cooldown account is excluded; comes back on expiry", () => {
+    const now = Date.now()
+    const a1 = makeAccount({ name: "a1", cooldownUntil: now + 60_000 })
+    const a2 = makeAccount({ name: "a2" })
+    const pool = new AccountPool([a1, a2], "round-robin")
+    expect(pool.pick().name).toBe("a2")
+    expect(pool.pick().name).toBe("a2")
+    // expire cooldown
+    a1.cooldownUntil = now - 1
+    const seen = new Set([pool.pick().name, pool.pick().name])
+    expect(seen.has("a1")).toBe(true)
+  })
+
+  test("acquire/release tracks inFlight and lastUsedAt", () => {
+    const a = makeAccount({ name: "a" })
+    const pool = new AccountPool([a], "round-robin")
+    const acquired = pool.acquire()
+    expect(acquired.inFlight).toBe(1)
+    pool.release(acquired)
+    expect(acquired.inFlight).toBe(0)
+    expect(acquired.lastUsedAt).toBeGreaterThan(0)
+  })
+
+  test("markCooldown sets cooldownUntil", () => {
+    const a = makeAccount({ name: "a" })
+    const pool = new AccountPool([a], "round-robin")
+    pool.markCooldown(a, 5000)
+    expect(a.cooldownUntil).toBeGreaterThan(Date.now())
+  })
+
+  test("markFailure increments failureCount", () => {
+    const a = makeAccount({ name: "a" })
+    const pool = new AccountPool([a], "round-robin")
+    pool.markFailure(a)
+    pool.markFailure(a)
+    expect(a.failureCount).toBe(2)
+  })
+})
diff --git a/tests/accounts-loader.test.ts b/tests/accounts-loader.test.ts
new file mode 100644
index 000000000..135c1be95
--- /dev/null
+++ b/tests/accounts-loader.test.ts
@@ -0,0 +1,89 @@
+import { test, expect, describe, beforeEach } from "bun:test"
+import fs from "node:fs"
+import os from "node:os"
+import path from "node:path"
+
+import { loadAccounts, persistAccounts } from "../src/lib/accounts-loader"
+import { initDb, __resetDbForTests } from "../src/lib/db"
+
+const tmp = (suffix = "") =>
+  path.join(
+    os.tmpdir(),
+    `copilot-api-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`,
+  )
+
+describe("accounts-loader", () => {
+  beforeEach(() => {
+    __resetDbForTests()
+  })
+
+  test("loads from accounts.json", async () => {
+    const file = tmp(".json")
+    fs.writeFileSync(
+      file,
+      JSON.stringify({
+        accounts: [
+          { name: "alice", github_token: "ghu_a", account_type: "business" },
+          { name: "bob", github_token: "ghu_b" },
+        ],
+      }),
+    )
+    const accounts = await loadAccounts({
+      accountsFile: file,
+      defaultAccountType: "individual",
+    })
+    expect(accounts).toHaveLength(2)
+    expect(accounts[0]).toMatchObject({
+      name: "alice",
+      accountType: "business",
+    })
+    expect(accounts[1]).toMatchObject({
+      name: "bob",
+      accountType: "individual",
+    })
+    fs.unlinkSync(file)
+  })
+
+  test("falls back to legacy single token when no file", async () => {
+    const accounts = await loadAccounts({
+      legacyToken: "ghu_legacy",
+      defaultAccountType: "individual",
+    })
+    expect(accounts).toHaveLength(1)
+    expect(accounts[0]).toMatchObject({
+      name: "default",
+      githubToken: "ghu_legacy",
+      accountType: "individual",
+    })
+  })
+
+  test("returns empty array if neither file nor token provided", async () => {
+    const accounts = await loadAccounts({ defaultAccountType: "individual" })
+    expect(accounts).toEqual([])
+  })
+
+  test("persistAccounts inserts into accounts table and is idempotent", async () => {
+    const dbPath = tmp(".sqlite")
+    const db = initDb(dbPath)
+    const accounts = await loadAccounts({
+      legacyToken: "ghu_legacy",
+      defaultAccountType: "individual",
+    })
+    persistAccounts(accounts)
+    persistAccounts(accounts) // again — should not error or duplicate
+    const rows = db
+      .query<
+        { name: string; account_type: string },
+        []
+      >("SELECT name, account_type FROM accounts")
+      .all()
+    expect(rows).toHaveLength(1)
+    expect(rows[0].name).toBe("default")
+    db.close()
+    try {
+      fs.unlinkSync(dbPath)
+    } catch {
+      // Windows may keep WAL/SHM file locks briefly; ignore.
+    }
+  })
+})

From 9c46b588a32f35dd10417006d2655bb4553df7a3 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:21:38 +0800
Subject: [PATCH 03/34] feat: add robust model mapping for messages and chat
 completions

---
 src/lib/utils.ts                              | 165 +++++++++++++++++-
 src/routes/chat-completions/handler.ts        |  19 +-
 src/routes/messages/count-tokens-handler.ts   |  13 +-
 src/routes/messages/handler.ts                |  20 ++-
 src/routes/messages/non-stream-translation.ts |  16 +-
 tests/model-mapping.test.ts                   | 122 +++++++++++++
 6 files changed, 337 insertions(+), 18 deletions(-)
 create mode 100644 tests/model-mapping.test.ts

diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index cc80be667..1ce123380 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -1,7 +1,12 @@
 import consola from "consola"
+import type { Context } from "hono"
+
+import type { Account } from "~/lib/account-pool"
+import type { ApiContext } from "~/lib/api-config"
 
 import { getModels } from "~/services/copilot/get-models"
 import { getVSCodeVersion } from "~/services/get-vscode-version"
+import type { Model } from "~/services/copilot/get-models"
 
 import { state } from "./state"
 
@@ -13,8 +18,166 @@ export const sleep = (ms: number) =>
 export const isNullish = (value: unknown): value is null | undefined =>
   value === null || value === undefined
 
+export function normalizeClaudeModelVersion(model: string): string {
+  if (!model.startsWith("claude-")) {
+    return model
+  }
+
+  // Convert numeric segments from hyphen to dot, e.g. claude-opus-4-6 -> claude-opus-4.6.
+  // Only replace when the next numeric token ends at '-' or end, so suffixes like '-1m' stay unchanged.
+  return model.replace(/(\d)-(?=\d(?:-|$))/g, "$1.")
+}
+
+/**
+ * Resolve model ID by checking the anthropic-beta header for context window variants.
+ */
+export function resolveModelId(model: string, c?: Context): string {
+  const normalized = normalizeClaudeModelVersion(model)
+
+  if (!c) {
+    return normalized
+  }
+
+  const betaHeader = c.req.header("anthropic-beta")
+  if (
+    normalized.startsWith("claude-")
+    && betaHeader
+    && /\bcontext-1m\b/.test(betaHeader)
+  ) {
+    if (normalized.endsWith("-1m")) {
+      return normalized
+    }
+    return `${normalized}-1m`
+  }
+
+  return normalized
+}
+
+/**
+ * Calculate Jaccard similarity between two strings based on character bigrams.
+ */
+export function jaccardSimilarity(str1: string, str2: string): number {
+  const getBigrams = (str: string): Set<string> => {
+    const bigrams = new Set<string>()
+    const normalized = str.toLowerCase().replace(/[^a-z0-9]/g, "")
+    for (let i = 0; i < normalized.length - 1; i++) {
+      bigrams.add(normalized.substring(i, i + 2))
+    }
+    return bigrams
+  }
+
+  const bigrams1 = getBigrams(str1)
+  const bigrams2 = getBigrams(str2)
+
+  if (bigrams1.size === 0 && bigrams2.size === 0) {
+    return 1
+  }
+
+  let intersection = 0
+  for (const bigram of bigrams1) {
+    if (bigrams2.has(bigram)) {
+      intersection++
+    }
+  }
+
+  const union = bigrams1.size + bigrams2.size - intersection
+  return union === 0 ? 0 : intersection / union
+}
+
+function findBestModelMatch(
+  modelId: string,
+  models: Array<Model>,
+  minSimilarity = 0.3,
+): Model | null {
+  if (models.length === 0) {
+    return null
+  }
+
+  let bestMatch: Model | null = null
+  let bestScore = 0
+
+  for (const model of models) {
+    const score = jaccardSimilarity(modelId, model.id)
+    if (score > bestScore) {
+      bestScore = score
+      bestMatch = model
+    }
+  }
+
+  if (bestScore >= minSimilarity && bestMatch) {
+    consola.info(
+      `Fuzzy matched model "${modelId}" to "${bestMatch.id}" (similarity: ${bestScore.toFixed(2)})`,
+    )
+    return bestMatch
+  }
+
+  return null
+}
+
+/**
+ * Resolve a requested model ID against available Copilot models.
+ * Order: exact -> fuzzy -> auto-version fallback -> first available.
+ */
+export function mapModelIdToAvailableModels(
+  requestedModelId: string,
+  models: Array<Model>,
+): string {
+  if (models.length === 0) {
+    return requestedModelId
+  }
+
+  const exact = models.find((m) => m.id === requestedModelId)
+  if (exact) {
+    return exact.id
+  }
+
+  const fuzzy = findBestModelMatch(requestedModelId, models)
+  if (fuzzy) {
+    return fuzzy.id
+  }
+
+  const autoModel = models.find((m) => m.id === "auto")
+  const autoVersionModel = models.find((m) => m.version === autoModel?.version)
+  if (autoVersionModel) {
+    consola.info(
+      `Model "${requestedModelId}" not found, using ${autoVersionModel.id} model`,
+    )
+    return autoVersionModel.id
+  }
+
+  const fallback = models[0]
+  consola.info(
+    `Model "${requestedModelId}" not found, using first available model: ${fallback.id}`,
+  )
+  return fallback.id
+}
+
+/**
+ * Resolve model ID from request metadata, then map to an available server model.
+ */
+export function resolveAndMapModelId(
+  model: string,
+  c?: Context,
+  models: Array<Model> = state.models?.data ?? [],
+): string {
+  const resolved = resolveModelId(model, c)
+  return mapModelIdToAvailableModels(resolved, models)
+}
+
+export function makeApiContext(account: Account): ApiContext {
+  return { account, vsCodeVersion: state.vsCodeVersion }
+}
+
+/** Returns an ApiContext for the first available pool account. */
+export function defaultApiContext(): ApiContext {
+  if (!state.pool || state.pool.accounts.length === 0) {
+    throw new Error("Account pool is empty; cannot build ApiContext")
+  }
+  return makeApiContext(state.pool.accounts[0])
+}
+
 export async function cacheModels(): Promise<void> {
-  const models = await getModels()
+  const models = await getModels(defaultApiContext())
   state.models = models
 }
 
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..e9efa686f 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -7,7 +7,11 @@ import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
-import { isNullish } from "~/lib/utils"
+import {
+  isNullish,
+  makeApiContext,
+  resolveAndMapModelId,
+} from "~/lib/utils"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
@@ -18,6 +22,10 @@ export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
+  payload = {
+    ...payload,
+    model: resolveAndMapModelId(payload.model, c, state.models?.data ?? []),
+  }
   consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
@@ -47,7 +55,14 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  const response = await createChatCompletions(payload)
+  if (!state.pool) throw new Error("Account pool not initialized")
+  const account = state.pool.acquire()
+  let response: Awaited<ReturnType<typeof createChatCompletions>>
+  try {
+    response = await createChatCompletions(makeApiContext(account), payload)
+  } finally {
+    state.pool.release(account)
+  }
 
   if (isNonStreaming(response)) {
     consola.debug("Non-streaming response:", JSON.stringify(response))
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 2ec849cb8..34588782f 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
+import { resolveAndMapModelId } from "~/lib/utils"
 
 import { type AnthropicMessagesPayload } from "./anthropic-types"
 import { translateToOpenAI } from "./non-stream-translation"
@@ -17,10 +18,18 @@ export async function handleCountTokens(c: Context) {
 
     const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
 
-    const openAIPayload = translateToOpenAI(anthropicPayload)
+    let openAIPayload = translateToOpenAI(anthropicPayload, c)
+    openAIPayload = {
+      ...openAIPayload,
+      model: resolveAndMapModelId(
+        openAIPayload.model,
+        undefined,
+        state.models?.data ?? [],
+      ),
+    }
 
     const selectedModel = state.models?.data.find(
-      (model) => model.id === anthropicPayload.model,
+      (model) => model.id === openAIPayload.model,
     )
 
     if (!selectedModel) {
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..8ddf1955e 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -6,6 +6,7 @@ import { streamSSE } from "hono/streaming"
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import { makeApiContext, resolveAndMapModelId } from "~/lib/utils"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
@@ -28,7 +29,15 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
-  const openAIPayload = translateToOpenAI(anthropicPayload)
+  let openAIPayload = translateToOpenAI(anthropicPayload, c)
+  openAIPayload = {
+    ...openAIPayload,
+    model: resolveAndMapModelId(
+      openAIPayload.model,
+      undefined,
+      state.models?.data ?? [],
+    ),
+  }
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
@@ -38,7 +47,14 @@ export async function handleCompletion(c: Context) {
     await awaitApproval()
   }
 
-  const response = await createChatCompletions(openAIPayload)
+  if (!state.pool) throw new Error("Account pool not initialized")
+  const account = state.pool.acquire()
+  let response: Awaited<ReturnType<typeof createChatCompletions>>
+  try {
+    response = await createChatCompletions(makeApiContext(account), openAIPayload)
+  } finally {
+    state.pool.release(account)
+  }
 
   if (isNonStreaming(response)) {
     consola.debug(
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
index dc41e6382..0c64b7c96 100644
--- a/src/routes/messages/non-stream-translation.ts
+++ b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,6 @@
+import type { Context } from "hono"
+
+import { resolveModelId } from "~/lib/utils"
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -28,9 +31,10 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
+  c?: Context,
 ): ChatCompletionsPayload {
   return {
-    model: translateModelName(payload.model),
+    model: resolveModelId(payload.model, c),
     messages: translateAnthropicMessagesToOpenAI(
       payload.messages,
       payload.system,
@@ -46,16 +50,6 @@ export function translateToOpenAI(
   }
 }
 
-function translateModelName(model: string): string {
-  // Subagent requests use a specific model number which Copilot doesn't support
-  if (model.startsWith("claude-sonnet-4-")) {
-    return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
-  } else if (model.startsWith("claude-opus-")) {
-    return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
-  }
-  return model
-}
-
 function translateAnthropicMessagesToOpenAI(
   anthropicMessages: Array<AnthropicMessage>,
   system: string | Array<AnthropicTextBlock> | undefined,
diff --git a/tests/model-mapping.test.ts b/tests/model-mapping.test.ts
new file mode 100644
index 000000000..94bc9993c
--- /dev/null
+++ b/tests/model-mapping.test.ts
@@ -0,0 +1,122 @@
+import { describe, expect, test } from "bun:test"
+import type { Context } from "hono"
+import type { Model } from "~/services/copilot/get-models"
+
+import {
+  jaccardSimilarity,
+  mapModelIdToAvailableModels,
+  normalizeClaudeModelVersion,
+  resolveModelId,
+} from "../src/lib/utils"
+
+function makeContext(anthropicBeta?: string): Context {
+  return {
+    req: {
+      header: (name: string) =>
+        name.toLowerCase() === "anthropic-beta" ? anthropicBeta : undefined,
+    },
+  } as unknown as Context
+}
+
+describe("model mapping", () => {
+  test("normalizes Claude numeric segments from hyphen to dot", () => {
+    expect(normalizeClaudeModelVersion("claude-opus-4-6")).toBe(
+      "claude-opus-4.6",
+    )
+    expect(normalizeClaudeModelVersion("claude-3-5-sonnet-20241022")).toBe(
+      "claude-3.5-sonnet-20241022",
+    )
+  })
+
+  test("does not change non-Claude models", () => {
+    expect(normalizeClaudeModelVersion("gpt-4.1")).toBe("gpt-4.1")
+  })
+
+  test("appends -1m when anthropic-beta has context-1m", () => {
+    const c = makeContext("foo,context-1m-2025-08-07,bar")
+    expect(resolveModelId("claude-opus-4-6", c)).toBe("claude-opus-4.6-1m")
+  })
+
+  test("does not append -1m twice", () => {
+    const c = makeContext("context-1m-2025-08-07")
+    expect(resolveModelId("claude-opus-4.6-1m", c)).toBe("claude-opus-4.6-1m")
+  })
+
+  test("keeps normalized model when context-1m is absent", () => {
+    const c = makeContext("claude-code-2025-02-19")
+    expect(resolveModelId("claude-opus-4-6", c)).toBe("claude-opus-4.6")
+  })
+
+  test("calculates Jaccard similarity for fuzzy matching", () => {
+    expect(jaccardSimilarity("claude-opus-4.6", "claude-opus-4.6")).toBe(1)
+    expect(jaccardSimilarity("claude-opus-4.6", "gpt-4o")).toBeLessThan(0.3)
+  })
+
+  test("uses exact match before fuzzy matching", () => {
+    const models = makeModels([
+      "claude-opus-4.6",
+      "claude-sonnet-4.5",
+      "auto",
+    ])
+    expect(mapModelIdToAvailableModels("claude-opus-4.6", models)).toBe(
+      "claude-opus-4.6",
+    )
+  })
+
+  test("uses fuzzy match when exact model is missing", () => {
+    const models = makeModels([
+      "claude-opus-4.6",
+      "claude-sonnet-4.5",
+      "auto",
+    ])
+    expect(mapModelIdToAvailableModels("claude-opus-4-6", models)).toBe(
+      "claude-opus-4.6",
+    )
+  })
+
+  test("falls back to auto-version model when no fuzzy match", () => {
+    const models = makeModels([
+      "claude-opus-4.6",
+      "auto",
+      "gpt-4o",
+    ])
+    expect(mapModelIdToAvailableModels("nonexistent-model", models)).toBe(
+      "auto",
+    )
+  })
+
+  test("falls back to first model when auto is unavailable", () => {
+    const models = makeModels(["claude-opus-4.6", "gpt-4o"])
+    expect(mapModelIdToAvailableModels("unknown-model", models)).toBe(
+      "claude-opus-4.6",
+    )
+  })
+})
+
+function makeModel(id: string, version = "v1"): Model {
+  return {
+    id,
+    version,
+    name: id,
+    vendor: "copilot",
+    object: "model",
+    preview: false,
+    model_picker_enabled: true,
+    capabilities: {
+      family: id.includes("claude") ? "claude" : "other",
+      limits: {},
+      object: "model_capabilities",
+      supports: {},
+      tokenizer: "o200k_base",
+      type: "chat",
+    },
+  }
+}
+
+function makeModels(ids: Array<string>): Array<Model> {
+  const versions: Record<string, string> = {
+    auto: "v-auto",
+    "gpt-4o": "v-auto",
+  }
+  return ids.map((id) => makeModel(id, versions[id] ?? "v1"))
+}

From 6ec05a65999ddd9fcc251513950391595fde1579 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:23:31 +0800
Subject: [PATCH 04/34] feat(services): thread Account through API helpers
 (Task #3)

All upstream-calling helpers now take an ApiContext (Account + VSCode
version) instead of reading from the global state. Handlers acquire an
account from the pool inline (real load-balancing comes in Task #4).

- src/lib/api-config.ts: copilotBaseUrl/copilotHeaders/githubHeaders take ApiContext
- src/services/{copilot,github}/*.ts: all signatures accept ctx as first arg
- src/lib/state.ts: drop legacy githubToken/copilotToken fields
- src/lib/token.ts: setupCopilotTokenFor uses makeApiContext; setupGitHubToken returns the token instead of mutating state
- src/lib/utils.ts: makeApiContext / defaultApiContext helpers; cacheModels uses pool's first account
- src/routes/{chat-completions,messages,embeddings}/*.ts: pool.acquire/release around upstream call
- src/routes/usage,token/route.ts: use defaultApiContext / defaultAccount
- src/check-usage.ts: build a temporary ApiContext from the loaded GitHub token

Refs #3

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/check-usage.ts                            | 16 ++++-
 src/lib/api-config.ts                         | 26 +++++---
 src/lib/state.ts                              | 11 +---
 src/lib/token.ts                              | 63 +++++++++----------
 src/routes/embeddings/route.ts                | 13 +++-
 src/routes/token/route.ts                     |  5 +-
 src/routes/usage/route.ts                     |  3 +-
 .../copilot/create-chat-completions.ts        | 12 ++--
 src/services/copilot/create-embeddings.ts     | 14 +++--
 src/services/copilot/get-models.ts            |  9 +--
 src/services/github/get-copilot-token.ts      |  8 +--
 src/services/github/get-copilot-usage.ts      |  9 ++-
 src/services/github/get-user.ts               |  8 +--
 src/start.ts                                  |  5 +-
 tests/create-chat-completions.test.ts         | 22 ++++---
 15 files changed, 127 insertions(+), 97 deletions(-)

diff --git a/src/check-usage.ts b/src/check-usage.ts
index 1236ebc69..04142fe20 100644
--- a/src/check-usage.ts
+++ b/src/check-usage.ts
@@ -2,6 +2,7 @@ import { defineCommand } from "citty"
 import consola from "consola"
 
 import { ensurePaths } from "./lib/paths"
+import { state } from "./lib/state"
 import { setupGitHubToken } from "./lib/token"
 import {
   getCopilotUsage,
@@ -15,9 +16,20 @@ export const checkUsage = defineCommand({
   },
   async run() {
     await ensurePaths()
-    await setupGitHubToken()
+    const githubToken = await setupGitHubToken()
     try {
-      const usage = await getCopilotUsage()
+      const usage = await getCopilotUsage({
+        account: {
+          name: "_check-usage",
+          accountType: state.accountType,
+          githubToken,
+          copilotTokenRefreshAt: 0,
+          inFlight: 0,
+          lastUsedAt: 0,
+          failureCount: 0,
+        },
+        vsCodeVersion: state.vsCodeVersion,
+      })
       const premium = usage.quota_snapshots.premium_interactions
       const premiumTotal = premium.entitlement
       const premiumUsed = premiumTotal - premium.remaining
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 83bce92ad..24895dbaa 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -1,6 +1,6 @@
 import { randomUUID } from "node:crypto"
 
-import type { State } from "./state"
+import type { Account } from "./account-pool"
 
 export const standardHeaders = () => ({
   "content-type": "application/json",
@@ -13,16 +13,22 @@ const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
 const API_VERSION = "2025-04-01"
 
-export const copilotBaseUrl = (state: State) =>
-  state.accountType === "individual" ?
+export interface ApiContext {
+  account: Account
+  vsCodeVersion?: string
+}
+
+export const copilotBaseUrl = (ctx: ApiContext) =>
+  ctx.account.accountType === "individual" ?
     "https://api.githubcopilot.com"
-  : `https://api.${state.accountType}.githubcopilot.com`
-export const copilotHeaders = (state: State, vision: boolean = false) => {
+  : `https://api.${ctx.account.accountType}.githubcopilot.com`
+
+export const copilotHeaders = (ctx: ApiContext, vision: boolean = false) => {
   const headers: Record<string, string> = {
-    Authorization: `Bearer ${state.copilotToken}`,
+    Authorization: `Bearer ${ctx.account.copilotToken}`,
     "content-type": standardHeaders()["content-type"],
     "copilot-integration-id": "vscode-chat",
-    "editor-version": `vscode/${state.vsCodeVersion}`,
+    "editor-version": `vscode/${ctx.vsCodeVersion}`,
     "editor-plugin-version": EDITOR_PLUGIN_VERSION,
     "user-agent": USER_AGENT,
     "openai-intent": "conversation-panel",
@@ -37,10 +43,10 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
 }
 
 export const GITHUB_API_BASE_URL = "https://api.github.com"
-export const githubHeaders = (state: State) => ({
+export const githubHeaders = (ctx: ApiContext) => ({
   ...standardHeaders(),
-  authorization: `token ${state.githubToken}`,
-  "editor-version": `vscode/${state.vsCodeVersion}`,
+  authorization: `token ${ctx.account.githubToken}`,
+  "editor-version": `vscode/${ctx.vsCodeVersion}`,
   "editor-plugin-version": EDITOR_PLUGIN_VERSION,
   "user-agent": USER_AGENT,
   "x-github-api-version": API_VERSION,
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 7c9c45537..72bb388de 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -1,18 +1,11 @@
 import type { ModelsResponse } from "~/services/copilot/get-models"
 
-import type { Account, Strategy } from "./account-pool"
-import type { AccountPool } from "./account-pool"
+import type { Account, AccountPool, Strategy } from "./account-pool"
 
 export interface State {
-  // Multi-account pool. Until task 03 wires service code through it,
-  // legacy fields below mirror the "default" account.
   pool?: AccountPool
   strategy: Strategy
 
-  // Legacy fields (deprecated; will be removed in task 03):
-  githubToken?: string
-  copilotToken?: string
-
   accountType: string
   models?: ModelsResponse
   vsCodeVersion?: string
@@ -34,7 +27,7 @@ export const state: State = {
   showToken: false,
 }
 
-/** Convenience: the first usable account, used by legacy single-account paths. */
+/** Convenience: the first usable account. */
 export function defaultAccount(): Account | undefined {
   return state.pool?.accounts[0]
 }
diff --git a/src/lib/token.ts b/src/lib/token.ts
index cbebdc5df..4e3d8e9f2 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -11,31 +11,22 @@ import { pollAccessToken } from "~/services/github/poll-access-token"
 
 import { HTTPError } from "./error"
 import { state } from "./state"
+import { makeApiContext } from "./utils"
 
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8")
 
 const writeGithubToken = (token: string) =>
   fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token)
 
-/**
- * Set up the Copilot token for a single account, including auto-refresh.
- * The previous global helper `setupCopilotToken` is replaced by per-account
- * setup; legacy `state.copilotToken` is mirrored for not-yet-migrated callers.
- */
+/** Per-account Copilot token setup with auto-refresh. */
 export const setupCopilotTokenFor = async (account: Account) => {
-  // Temporarily expose this account's GitHub token for the legacy
-  // api-config helper which still reads `state.githubToken`.
-  state.githubToken = account.githubToken
-  const { token, refresh_in } = await getCopilotToken()
+  const ctx = makeApiContext(account)
+  const { token, refresh_in } = await getCopilotToken(ctx)
   /* eslint-disable require-atomic-updates */
   account.copilotToken = token
   account.copilotTokenRefreshAt = Date.now() + refresh_in * 1000
   /* eslint-enable require-atomic-updates */
 
-  // Mirror the first account's token into legacy state for callers
-  // not yet migrated to the pool (removed in task 03).
-  state.copilotToken = token
-
   consola.debug(`[${account.name}] Copilot token fetched successfully`)
   if (state.showToken) {
     consola.info(`[${account.name}] Copilot token:`, token)
@@ -45,13 +36,11 @@ export const setupCopilotTokenFor = async (account: Account) => {
   account.refreshTimer = setInterval(async () => {
     consola.debug(`[${account.name}] Refreshing Copilot token`)
     try {
-      state.githubToken = account.githubToken
-      const refreshed = await getCopilotToken()
+      const refreshed = await getCopilotToken(makeApiContext(account))
       /* eslint-disable require-atomic-updates */
       account.copilotToken = refreshed.token
       account.copilotTokenRefreshAt = Date.now() + refreshed.refresh_in * 1000
       /* eslint-enable require-atomic-updates */
-      state.copilotToken = refreshed.token
       consola.debug(`[${account.name}] Copilot token refreshed`)
       if (state.showToken) {
         consola.info(
@@ -70,20 +59,23 @@ interface SetupGitHubTokenOptions {
   force?: boolean
 }
 
+/**
+ * Reads or fetches a single GitHub token file at PATHS.GITHUB_TOKEN_PATH.
+ * Returns the token; the caller is responsible for putting it into the
+ * account pool.
+ */
 export async function setupGitHubToken(
   options?: SetupGitHubTokenOptions,
-): Promise<void> {
+): Promise<string> {
   try {
     const githubToken = await readGithubToken()
 
     if (githubToken && !options?.force) {
-      state.githubToken = githubToken
       if (state.showToken) {
         consola.info("GitHub token:", githubToken)
       }
-      await logUser()
-
-      return
+      await logUser(githubToken)
+      return githubToken
     }
 
     consola.info("Not logged in, getting new access token")
@@ -96,12 +88,12 @@ export async function setupGitHubToken(
 
     const token = await pollAccessToken(response)
     await writeGithubToken(token)
-    state.githubToken = token
 
     if (state.showToken) {
       consola.info("GitHub token:", token)
     }
-    await logUser()
+    await logUser(token)
+    return token
   } catch (error) {
     if (error instanceof HTTPError) {
       consola.error("Failed to get GitHub token:", await error.response.json())
@@ -113,16 +105,21 @@ export async function setupGitHubToken(
   }
 }
 
-/** Backwards-compat wrapper: sets up Copilot token for the default account. */
-export const setupCopilotToken = async () => {
-  if (state.pool && state.pool.accounts.length > 0) {
-    await setupCopilotTokenFor(state.pool.accounts[0])
-    return
+async function logUser(githubToken: string) {
+  // Build a temporary "anonymous" account with just the GitHub token,
+  // so we can call /user without going through the pool.
+  const tempAccount: Account = {
+    name: "_setup",
+    accountType: state.accountType,
+    githubToken,
+    copilotTokenRefreshAt: 0,
+    inFlight: 0,
+    lastUsedAt: 0,
+    failureCount: 0,
   }
-  // No pool yet (very early callers) — do nothing.
-}
-
-async function logUser() {
-  const user = await getGitHubUser()
+  const user = await getGitHubUser({
+    account: tempAccount,
+    vsCodeVersion: state.vsCodeVersion,
+  })
   consola.info(`Logged in as ${user.login}`)
 }
diff --git a/src/routes/embeddings/route.ts b/src/routes/embeddings/route.ts
index 4c4fc7b8a..478bea493 100644
--- a/src/routes/embeddings/route.ts
+++ b/src/routes/embeddings/route.ts
@@ -1,6 +1,8 @@
 import { Hono } from "hono"
 
 import { forwardError } from "~/lib/error"
+import { state } from "~/lib/state"
+import { makeApiContext } from "~/lib/utils"
 import {
   createEmbeddings,
   type EmbeddingRequest,
@@ -11,9 +13,14 @@ export const embeddingRoutes = new Hono()
 embeddingRoutes.post("/", async (c) => {
   try {
     const paylod = await c.req.json<EmbeddingRequest>()
-    const response = await createEmbeddings(paylod)
-
-    return c.json(response)
+    if (!state.pool) throw new Error("Account pool not initialized")
+    const account = state.pool.acquire()
+    try {
+      const response = await createEmbeddings(makeApiContext(account), paylod)
+      return c.json(response)
+    } finally {
+      state.pool.release(account)
+    }
   } catch (error) {
     return await forwardError(c, error)
   }
diff --git a/src/routes/token/route.ts b/src/routes/token/route.ts
index dd0456d9a..5e1acfd8f 100644
--- a/src/routes/token/route.ts
+++ b/src/routes/token/route.ts
@@ -1,13 +1,14 @@
 import { Hono } from "hono"
 
-import { state } from "~/lib/state"
+import { defaultAccount } from "~/lib/state"
 
 export const tokenRoute = new Hono()
 
 tokenRoute.get("/", (c) => {
   try {
+    const account = defaultAccount()
     return c.json({
-      token: state.copilotToken,
+      token: account?.copilotToken ?? null,
     })
   } catch (error) {
     console.error("Error fetching token:", error)
diff --git a/src/routes/usage/route.ts b/src/routes/usage/route.ts
index 3e9473236..847a2f94e 100644
--- a/src/routes/usage/route.ts
+++ b/src/routes/usage/route.ts
@@ -1,12 +1,13 @@
 import { Hono } from "hono"
 
+import { defaultApiContext } from "~/lib/utils"
 import { getCopilotUsage } from "~/services/github/get-copilot-usage"
 
 export const usageRoute = new Hono()
 
 usageRoute.get("/", async (c) => {
   try {
-    const usage = await getCopilotUsage()
+    const usage = await getCopilotUsage(defaultApiContext())
     return c.json(usage)
   } catch (error) {
     console.error("Error fetching Copilot usage:", error)
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 8534151da..c3e031b9c 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -1,14 +1,16 @@
 import consola from "consola"
 import { events } from "fetch-event-stream"
 
+import type { ApiContext } from "~/lib/api-config"
+
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
-import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
+  ctx: ApiContext,
   payload: ChatCompletionsPayload,
 ) => {
-  if (!state.copilotToken) throw new Error("Copilot token not found")
+  if (!ctx.account.copilotToken) throw new Error("Copilot token not found")
 
   const enableVision = payload.messages.some(
     (x) =>
@@ -17,18 +19,16 @@ export const createChatCompletions = async (
   )
 
   // Agent/user check for X-Initiator header
-  // Determine if any message is from an agent ("assistant" or "tool")
   const isAgentCall = payload.messages.some((msg) =>
     ["assistant", "tool"].includes(msg.role),
   )
 
-  // Build headers and add X-Initiator
   const headers: Record<string, string> = {
-    ...copilotHeaders(state, enableVision),
+    ...copilotHeaders(ctx, enableVision),
     "X-Initiator": isAgentCall ? "agent" : "user",
   }
 
-  const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
+  const response = await fetch(`${copilotBaseUrl(ctx)}/chat/completions`, {
     method: "POST",
     headers,
     body: JSON.stringify(payload),
diff --git a/src/services/copilot/create-embeddings.ts b/src/services/copilot/create-embeddings.ts
index f2ad5c233..5c29a804f 100644
--- a/src/services/copilot/create-embeddings.ts
+++ b/src/services/copilot/create-embeddings.ts
@@ -1,13 +1,17 @@
+import type { ApiContext } from "~/lib/api-config"
+
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
-import { state } from "~/lib/state"
 
-export const createEmbeddings = async (payload: EmbeddingRequest) => {
-  if (!state.copilotToken) throw new Error("Copilot token not found")
+export const createEmbeddings = async (
+  ctx: ApiContext,
+  payload: EmbeddingRequest,
+) => {
+  if (!ctx.account.copilotToken) throw new Error("Copilot token not found")
 
-  const response = await fetch(`${copilotBaseUrl(state)}/embeddings`, {
+  const response = await fetch(`${copilotBaseUrl(ctx)}/embeddings`, {
     method: "POST",
-    headers: copilotHeaders(state),
+    headers: copilotHeaders(ctx),
     body: JSON.stringify(payload),
   })
 
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3cfa30af0..450e3ac35 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -1,10 +1,11 @@
+import type { ApiContext } from "~/lib/api-config"
+
 import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
-import { state } from "~/lib/state"
 
-export const getModels = async () => {
-  const response = await fetch(`${copilotBaseUrl(state)}/models`, {
-    headers: copilotHeaders(state),
+export const getModels = async (ctx: ApiContext) => {
+  const response = await fetch(`${copilotBaseUrl(ctx)}/models`, {
+    headers: copilotHeaders(ctx),
   })
 
   if (!response.ok) throw new HTTPError("Failed to get models", response)
diff --git a/src/services/github/get-copilot-token.ts b/src/services/github/get-copilot-token.ts
index 98744bab1..423e4827b 100644
--- a/src/services/github/get-copilot-token.ts
+++ b/src/services/github/get-copilot-token.ts
@@ -1,12 +1,13 @@
+import type { ApiContext } from "~/lib/api-config"
+
 import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
-import { state } from "~/lib/state"
 
-export const getCopilotToken = async () => {
+export const getCopilotToken = async (ctx: ApiContext) => {
   const response = await fetch(
     `${GITHUB_API_BASE_URL}/copilot_internal/v2/token`,
     {
-      headers: githubHeaders(state),
+      headers: githubHeaders(ctx),
     },
   )
 
@@ -15,7 +16,6 @@ export const getCopilotToken = async () => {
   return (await response.json()) as GetCopilotTokenResponse
 }
 
-// Trimmed for the sake of simplicity
 interface GetCopilotTokenResponse {
   expires_at: number
   refresh_in: number
diff --git a/src/services/github/get-copilot-usage.ts b/src/services/github/get-copilot-usage.ts
index 6cdd8bc10..5c8e0bc30 100644
--- a/src/services/github/get-copilot-usage.ts
+++ b/src/services/github/get-copilot-usage.ts
@@ -1,10 +1,13 @@
+import type { ApiContext } from "~/lib/api-config"
+
 import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
-import { state } from "~/lib/state"
 
-export const getCopilotUsage = async (): Promise<CopilotUsageResponse> => {
+export const getCopilotUsage = async (
+  ctx: ApiContext,
+): Promise<CopilotUsageResponse> => {
   const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, {
-    headers: githubHeaders(state),
+    headers: githubHeaders(ctx),
   })
 
   if (!response.ok) {
diff --git a/src/services/github/get-user.ts b/src/services/github/get-user.ts
index 23e1b1c1c..534e8e325 100644
--- a/src/services/github/get-user.ts
+++ b/src/services/github/get-user.ts
@@ -1,11 +1,12 @@
+import type { ApiContext } from "~/lib/api-config"
+
 import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
-import { state } from "~/lib/state"
 
-export async function getGitHubUser() {
+export async function getGitHubUser(ctx: ApiContext) {
   const response = await fetch(`${GITHUB_API_BASE_URL}/user`, {
     headers: {
-      authorization: `token ${state.githubToken}`,
+      authorization: `token ${ctx.account.githubToken}`,
       ...standardHeaders(),
     },
   })
@@ -15,7 +16,6 @@ export async function getGitHubUser() {
   return (await response.json()) as GithubUserResponse
 }
 
-// Trimmed for the sake of simplicity
 interface GithubUserResponse {
   login: string
 }
diff --git a/src/start.ts b/src/start.ts
index ead59acd4..423f51337 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -61,8 +61,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   // Resolve legacy single token if no accounts file is provided.
   let legacyToken = options.githubToken
   if (!options.accountsFile && !legacyToken) {
-    await setupGitHubToken()
-    legacyToken = state.githubToken
+    legacyToken = await setupGitHubToken()
   } else if (legacyToken) {
     consola.info("Using provided GitHub token")
   }
@@ -80,7 +79,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   }
 
   const pool = new AccountPool(loaded, options.strategy)
-  // eslint-disable-next-line require-atomic-updates
+
   state.pool = pool
   persistAccounts(loaded)
 
diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts
index d18e741aa..59d62b976 100644
--- a/tests/create-chat-completions.test.ts
+++ b/tests/create-chat-completions.test.ts
@@ -1,16 +1,22 @@
 import { test, expect, mock } from "bun:test"
 
+import type { Account } from "../src/lib/account-pool"
 import type { ChatCompletionsPayload } from "../src/services/copilot/create-chat-completions"
 
-import { state } from "../src/lib/state"
 import { createChatCompletions } from "../src/services/copilot/create-chat-completions"
 
-// Mock state
-state.copilotToken = "test-token"
-state.vsCodeVersion = "1.0.0"
-state.accountType = "individual"
+const account: Account = {
+  name: "test",
+  accountType: "individual",
+  githubToken: "ghu_test",
+  copilotToken: "test-token",
+  copilotTokenRefreshAt: 0,
+  inFlight: 0,
+  lastUsedAt: 0,
+  failureCount: 0,
+}
+const ctx = { account, vsCodeVersion: "1.0.0" }
 
-// Helper to mock fetch
 const fetchMock = mock(
   (_url: string, opts: { headers: Record<string, string> }) => {
     return {
@@ -31,7 +37,7 @@ test("sets X-Initiator to agent if tool/assistant present", async () => {
     ],
     model: "gpt-test",
   }
-  await createChatCompletions(payload)
+  await createChatCompletions(ctx, payload)
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
     fetchMock.mock.calls[0][1] as { headers: Record<string, string> }
@@ -47,7 +53,7 @@ test("sets X-Initiator to user if only user present", async () => {
     ],
     model: "gpt-test",
   }
-  await createChatCompletions(payload)
+  await createChatCompletions(ctx, payload)
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
     fetchMock.mock.calls[1][1] as { headers: Record<string, string> }

From 5a20f3269f98034539d4388c47568e88abb34570 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:32:51 +0800
Subject: [PATCH 05/34] feat(accounts): withAccount wrapper for retry/cooldown
 (Task #4)

- src/lib/with-account.ts: 401 -> refresh token + retry; 5xx/network -> cooldown + retry; 4xx propagates. Cap min(pool size, 3).
- Handlers in chat-completions, messages, embeddings now go through withAccount.

Tests: success, 5xx retry/cooldown, 4xx no retry, 401 refresh + retry, retries cap.

Refs #4

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 docs/tasks/04-with-account-wrapper.md  |  68 +++++++++++++++
 src/lib/with-account.ts                | 109 +++++++++++++++++++++++++
 src/routes/chat-completions/handler.ts |  18 ++--
 src/routes/embeddings/route.ts         |  14 ++--
 src/routes/messages/handler.ts         |  12 +--
 tests/with-account.test.ts             | 108 ++++++++++++++++++++++++
 6 files changed, 299 insertions(+), 30 deletions(-)
 create mode 100644 docs/tasks/04-with-account-wrapper.md
 create mode 100644 src/lib/with-account.ts
 create mode 100644 tests/with-account.test.ts

diff --git a/docs/tasks/04-with-account-wrapper.md b/docs/tasks/04-with-account-wrapper.md
new file mode 100644
index 000000000..30dc2bafe
--- /dev/null
+++ b/docs/tasks/04-with-account-wrapper.md
@@ -0,0 +1,68 @@
+# Task 04 — Handler `withAccount` wrapper
+
+**Depends on:** 03
+**Unblocks:** 07, 08, 09, 10
+
+## Goal
+
+Replace the inline `acquire/release` placeholder from task 03 with a single
+`withAccount` helper that handles retry, cooldown, abort, and the
+`x-internal-pricing-sync` exemption.
+
+## Scope
+
+New file `src/lib/with-account.ts`:
+
+```ts
+export async function withAccount<T>(
+  c: Context,
+  fn: (account: Account) => Promise<T>,
+): Promise<T> {
+  const isInternal = c.req.header('x-internal-pricing-sync') === '1'
+  const maxRetries = Math.min(state.pool.size(), 3)
+  let lastErr: unknown
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    const account = await state.pool.acquire()
+    try {
+      const out = await fn(account)
+      account.consecutiveFailures = 0
+      return out
+    } catch (e) {
+      lastErr = e
+      if (isClientError(e)) throw e             // 4xx (non-401) — no retry
+      if (isAuthError(e))   triggerRefresh(account) // 401 — refresh, then retry
+      else                  state.pool.markCooldown(account, 30_000) // 5xx / network
+    } finally {
+      state.pool.release(account)
+    }
+  }
+  throw lastErr
+}
+```
+
+Update each handler to:
+
+```ts
+return withAccount(c, async (account) => {
+  // ...existing logic with `account` threaded into service call...
+})
+```
+
+Streaming handlers must not retry once the SSE response has begun flushing.
+Either:
+
+- Detect "headers already sent" and rethrow without rotating, OR
+- Wrap retry only around the `fetch()` upstream call, and once events start
+  flowing, abort retry.
+
+## Definition of Done
+
+- [ ] `withAccount` is the only place that calls `pool.acquire/release`
+  outside startup code.
+- [ ] Unit test: forcing a 401 once causes one retry against a different
+  account (use a dummy pool of two accounts).
+- [ ] Unit test: forcing a 4xx never retries.
+- [ ] Manual smoke: kill one account's token mid-flight; new requests succeed
+  on the other account; the dead account enters cooldown.
+- [ ] Internal `x-internal-pricing-sync: 1` requests bypass nothing in this
+  task (the exemption only matters for the recorder in task 06).
diff --git a/src/lib/with-account.ts b/src/lib/with-account.ts
new file mode 100644
index 000000000..799097a7f
--- /dev/null
+++ b/src/lib/with-account.ts
@@ -0,0 +1,109 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+
+import type { AccountPool, Account } from "./account-pool"
+
+import { HTTPError } from "./error"
+import { state } from "./state"
+import { setupCopilotTokenFor } from "./token"
+
+const COOLDOWN_MS = 30_000
+const MAX_RETRIES_CAP = 3
+
+export interface WithAccountOptions {
+  /** Override max retries (still capped by pool size). */
+  maxRetries?: number
+}
+
+async function safeRefresh(account: Account): Promise<boolean> {
+  try {
+    await setupCopilotTokenFor(account)
+    return true
+  } catch (refreshErr) {
+    consola.error(`[${account.name}] token refresh failed:`, refreshErr)
+    return false
+  }
+}
+
+/**
+ * Handle one error from `fn(account)`.
+ * Throws to bubble up immediately, returns nothing to continue the retry loop.
+ */
+async function handleAttemptError(
+  pool: AccountPool,
+  account: Account,
+  err: unknown,
+): Promise<void> {
+  if (err instanceof HTTPError) {
+    const { status } = err.response
+    if (status === 401) {
+      consola.warn(
+        `[${account.name}] 401 from upstream; refreshing token and retrying`,
+      )
+      const ok = await safeRefresh(account)
+      if (!ok) pool.markCooldown(account, COOLDOWN_MS)
+      return
+    }
+    if (status >= 400 && status < 500) {
+      // Client error — propagate without retry.
+      throw err
+    }
+    // 5xx
+    consola.warn(
+      `[${account.name}] ${status} from upstream; cooling down ${COOLDOWN_MS}ms`,
+    )
+    pool.markCooldown(account, COOLDOWN_MS)
+    return
+  }
+
+  // Non-HTTP error (network, timeout, etc.)
+  consola.warn(`[${account.name}] non-HTTP error; cooling down`, err)
+  pool.markCooldown(account, COOLDOWN_MS)
+}
+
+/**
+ * Acquire an account from the pool, run `fn`, and on failure retry against
+ * a different account up to `min(pool size, MAX_RETRIES_CAP)` times.
+ *
+ * Retry policy:
+ *   - 4xx (non-401): no retry; client error rethrows immediately.
+ *   - 401:           refresh the account's Copilot token and retry.
+ *   - 5xx / network: cooldown the account for 30s and retry.
+ *
+ * Streaming handlers should call `withAccount` ONLY around the upstream
+ * fetch — once SSE has started flushing, retry is unsafe.
+ */
+export async function withAccount<T>(
+  c: Context | undefined,
+  fn: (account: Account) => Promise<T>,
+  options: WithAccountOptions = {},
+): Promise<T> {
+  if (!state.pool) throw new Error("Account pool not initialized")
+  const pool = state.pool
+  void c // currently unused; kept for parity with design (e.g. internal-call header)
+
+  const usableCount = pool.accounts.length
+  const requested = options.maxRetries ?? MAX_RETRIES_CAP
+  const maxAttempts = Math.max(
+    1,
+    Math.min(requested, usableCount, MAX_RETRIES_CAP),
+  )
+
+  let lastErr: unknown
+  for (let attempt = 0; attempt < maxAttempts; attempt++) {
+    const account = pool.acquire()
+    try {
+      const out = await fn(account)
+      account.failureCount = 0
+      pool.release(account)
+      return out
+    } catch (err) {
+      lastErr = err
+      pool.release(account)
+      pool.markFailure(account)
+      await handleAttemptError(pool, account, err)
+    }
+  }
+  throw lastErr
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index e9efa686f..21b5bab55 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -7,11 +7,8 @@ import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
-import {
-  isNullish,
-  makeApiContext,
-  resolveAndMapModelId,
-} from "~/lib/utils"
+import { isNullish, makeApiContext, resolveAndMapModelId } from "~/lib/utils"
+import { withAccount } from "~/lib/with-account"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
@@ -55,14 +52,9 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  if (!state.pool) throw new Error("Account pool not initialized")
-  const account = state.pool.acquire()
-  let response: Awaited<ReturnType<typeof createChatCompletions>>
-  try {
-    response = await createChatCompletions(makeApiContext(account), payload)
-  } finally {
-    state.pool.release(account)
-  }
+  const response = await withAccount(c, (account) =>
+    createChatCompletions(makeApiContext(account), payload),
+  )
 
   if (isNonStreaming(response)) {
     consola.debug("Non-streaming response:", JSON.stringify(response))
diff --git a/src/routes/embeddings/route.ts b/src/routes/embeddings/route.ts
index 478bea493..2bff9a3a2 100644
--- a/src/routes/embeddings/route.ts
+++ b/src/routes/embeddings/route.ts
@@ -1,8 +1,8 @@
 import { Hono } from "hono"
 
 import { forwardError } from "~/lib/error"
-import { state } from "~/lib/state"
 import { makeApiContext } from "~/lib/utils"
+import { withAccount } from "~/lib/with-account"
 import {
   createEmbeddings,
   type EmbeddingRequest,
@@ -13,14 +13,10 @@ export const embeddingRoutes = new Hono()
 embeddingRoutes.post("/", async (c) => {
   try {
     const paylod = await c.req.json<EmbeddingRequest>()
-    if (!state.pool) throw new Error("Account pool not initialized")
-    const account = state.pool.acquire()
-    try {
-      const response = await createEmbeddings(makeApiContext(account), paylod)
-      return c.json(response)
-    } finally {
-      state.pool.release(account)
-    }
+    const response = await withAccount(c, (account) =>
+      createEmbeddings(makeApiContext(account), paylod),
+    )
+    return c.json(response)
   } catch (error) {
     return await forwardError(c, error)
   }
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 8ddf1955e..345c4252b 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -7,6 +7,7 @@ import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { makeApiContext, resolveAndMapModelId } from "~/lib/utils"
+import { withAccount } from "~/lib/with-account"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
@@ -47,14 +48,9 @@ export async function handleCompletion(c: Context) {
     await awaitApproval()
   }
 
-  if (!state.pool) throw new Error("Account pool not initialized")
-  const account = state.pool.acquire()
-  let response: Awaited<ReturnType<typeof createChatCompletions>>
-  try {
-    response = await createChatCompletions(makeApiContext(account), openAIPayload)
-  } finally {
-    state.pool.release(account)
-  }
+  const response = await withAccount(c, (account) =>
+    createChatCompletions(makeApiContext(account), openAIPayload),
+  )
 
   if (isNonStreaming(response)) {
     consola.debug(
diff --git a/tests/with-account.test.ts b/tests/with-account.test.ts
new file mode 100644
index 000000000..42bf2e2f6
--- /dev/null
+++ b/tests/with-account.test.ts
@@ -0,0 +1,108 @@
+import { test, expect, describe, beforeEach, mock } from "bun:test"
+
+import { AccountPool, type Account } from "../src/lib/account-pool"
+import { HTTPError } from "../src/lib/error"
+import { state } from "../src/lib/state"
+import { withAccount } from "../src/lib/with-account"
+
+// Stub out token refresh so 401 retries don't hit the network.
+void mock.module("../src/lib/token", () => ({
+  setupCopilotTokenFor: async (_a: Account) => {
+    /* no-op */
+  },
+}))
+
+const makeAccount = (name: string): Account => ({
+  name,
+  accountType: "individual",
+  githubToken: `ghu_${name}`,
+  copilotToken: `tok_${name}`,
+  copilotTokenRefreshAt: 0,
+  inFlight: 0,
+  lastUsedAt: 0,
+  failureCount: 0,
+})
+
+const fakeResp = (status: number) => new Response("err", { status })
+
+describe("withAccount", () => {
+  beforeEach(() => {
+    state.pool = new AccountPool(
+      [makeAccount("a"), makeAccount("b")],
+      "round-robin",
+    )
+  })
+
+  test("returns value on success without retry", async () => {
+    const seen: Array<string> = []
+    const out = await withAccount(undefined, (account) => {
+      seen.push(account.name)
+      return Promise.resolve(42)
+    })
+    expect(out).toBe(42)
+    expect(seen).toHaveLength(1)
+  })
+
+  test("retries on 5xx with a different account, then succeeds", async () => {
+    const seen: Array<string> = []
+    const out = await withAccount(undefined, (account) => {
+      seen.push(account.name)
+      if (seen.length === 1) {
+        return Promise.reject(new HTTPError("upstream 503", fakeResp(503)))
+      }
+      return Promise.resolve("ok")
+    })
+    expect(out).toBe("ok")
+    expect(seen).toHaveLength(2)
+    expect(seen[0]).not.toBe(seen[1])
+    // First account should be on cooldown
+    const first = state.pool?.accounts[0]
+    expect(first?.cooldownUntil ?? 0).toBeGreaterThan(Date.now())
+  })
+
+  test("4xx client error (non-401) does not retry", async () => {
+    const seen: Array<string> = []
+    const promise = withAccount(undefined, (account) => {
+      seen.push(account.name)
+      return Promise.reject(new HTTPError("bad request", fakeResp(400)))
+    })
+    let thrown: unknown
+    try {
+      await promise
+    } catch (e) {
+      thrown = e
+    }
+    expect(thrown).toBeInstanceOf(HTTPError)
+    expect(seen).toHaveLength(1)
+  })
+
+  test("401 triggers refresh and retries on a different account", async () => {
+    const seen: Array<string> = []
+    const out = await withAccount(undefined, (account) => {
+      seen.push(account.name)
+      if (seen.length === 1) {
+        return Promise.reject(new HTTPError("auth", fakeResp(401)))
+      }
+      return Promise.resolve("ok-after-refresh")
+    })
+    expect(out).toBe("ok-after-refresh")
+    expect(seen).toHaveLength(2)
+  })
+
+  test("retries cap at pool size", async () => {
+    const calls: Array<string> = []
+    const promise = withAccount(undefined, (account) => {
+      calls.push(account.name)
+      return Promise.reject(new HTTPError("upstream 502", fakeResp(502)))
+    })
+    let thrown: unknown
+    try {
+      await promise
+    } catch (e) {
+      thrown = e
+    }
+    expect(thrown).toBeInstanceOf(HTTPError)
+    // 2 accounts ⇒ exactly 2 attempts
+    expect(calls).toHaveLength(2)
+  })
+})

From be2fd05a50b0cc61889ad24bc06ad013175783c4 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:36:12 +0800
Subject: [PATCH 06/34] feat(usage): pure usage normalizer + stream
 accumulators (Task #5)

- src/lib/usage-normalizer.ts: NormalizedUsage shape + helpers for OpenAI / Anthropic / embeddings, plus streaming accumulators that watch for include_usage / message_start+message_delta and finalize to NormalizedUsage. UsageMissingError raised if OpenAI stream finishes without usage.

Tests: OpenAI/Anthropic/embeddings normalize, OpenAI accumulator capture + missing-usage error, Anthropic accumulator aggregation + sane zeros.

Refs #5

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/usage-normalizer.ts    | 147 +++++++++++++++++++++++++++++++++
 tests/usage-normalizer.test.ts | 142 +++++++++++++++++++++++++++++++
 2 files changed, 289 insertions(+)
 create mode 100644 src/lib/usage-normalizer.ts
 create mode 100644 tests/usage-normalizer.test.ts

diff --git a/src/lib/usage-normalizer.ts b/src/lib/usage-normalizer.ts
new file mode 100644
index 000000000..e041878c1
--- /dev/null
+++ b/src/lib/usage-normalizer.ts
@@ -0,0 +1,147 @@
+/**
+ * Pure (no DB, no network, no global state) helpers that convert upstream
+ * usage payloads from OpenAI and Anthropic into a single shape the rest of
+ * the system stores.
+ *
+ * Field rules (see docs/design/02-database-schema.md):
+ *   - Anthropic `cache_creation_input_tokens` is folded into `inputTokens`.
+ *   - OpenAI `completion_tokens` already includes reasoning tokens — never
+ *     add them on top of `outputTokens`. `reasoningTokens` is informational.
+ */
+
+export interface NormalizedUsage {
+  inputTokens: number
+  cachedInputTokens: number
+  outputTokens: number
+  reasoningTokens: number
+  totalTokens: number
+}
+
+export class UsageMissingError extends Error {
+  constructor(message = "Upstream stream never delivered usage information") {
+    super(message)
+    this.name = "UsageMissingError"
+  }
+}
+
+interface OpenAIUsageShape {
+  prompt_tokens?: number
+  completion_tokens?: number
+  total_tokens?: number
+  prompt_tokens_details?: { cached_tokens?: number }
+  completion_tokens_details?: { reasoning_tokens?: number }
+}
+
+interface AnthropicUsageShape {
+  input_tokens?: number
+  output_tokens?: number
+  cache_read_input_tokens?: number
+  cache_creation_input_tokens?: number
+}
+
+interface AnthropicMessageShape {
+  type?: string
+  message?: { usage?: AnthropicUsageShape }
+  usage?: AnthropicUsageShape
+}
+
+const numOr0 = (v: unknown): number => (typeof v === "number" ? v : 0)
+
+export function normalizeOpenAIFinal(usage: unknown): NormalizedUsage {
+  const u = (usage ?? {}) as OpenAIUsageShape
+  const inputTokens = numOr0(u.prompt_tokens)
+  const cachedInputTokens = numOr0(u.prompt_tokens_details?.cached_tokens)
+  const outputTokens = numOr0(u.completion_tokens)
+  const reasoningTokens = numOr0(u.completion_tokens_details?.reasoning_tokens)
+  const totalTokens = numOr0(u.total_tokens) || inputTokens + outputTokens
+  return {
+    inputTokens,
+    cachedInputTokens,
+    outputTokens,
+    reasoningTokens,
+    totalTokens,
+  }
+}
+
+export function normalizeAnthropicMessage(message: unknown): NormalizedUsage {
+  const m = (message ?? {}) as { usage?: AnthropicUsageShape }
+  const u = m.usage ?? {}
+  const baseInput = numOr0(u.input_tokens)
+  const cacheCreate = numOr0(u.cache_creation_input_tokens)
+  const cachedInputTokens = numOr0(u.cache_read_input_tokens)
+  const inputTokens = baseInput + cacheCreate
+  const outputTokens = numOr0(u.output_tokens)
+  return {
+    inputTokens,
+    cachedInputTokens,
+    outputTokens,
+    reasoningTokens: 0,
+    totalTokens: inputTokens + outputTokens,
+  }
+}
+
+export function normalizeEmbeddings(usage: unknown): NormalizedUsage {
+  const u = (usage ?? {}) as { prompt_tokens?: number; total_tokens?: number }
+  const inputTokens = numOr0(u.prompt_tokens)
+  return {
+    inputTokens,
+    cachedInputTokens: 0,
+    outputTokens: 0,
+    reasoningTokens: 0,
+    totalTokens: numOr0(u.total_tokens) || inputTokens,
+  }
+}
+
+export interface StreamUsageAccumulator {
+  feed(chunk: unknown): void
+  finalize(): NormalizedUsage
+}
+
+export function createOpenAIAccumulator(): StreamUsageAccumulator {
+  let saved: OpenAIUsageShape | undefined
+
+  return {
+    feed(chunk) {
+      const c = chunk as { usage?: OpenAIUsageShape } | null | undefined
+      if (c && c.usage) {
+        saved = c.usage
+      }
+    },
+    finalize() {
+      if (!saved) throw new UsageMissingError()
+      return normalizeOpenAIFinal(saved)
+    },
+  }
+}
+
+export function createAnthropicAccumulator(): StreamUsageAccumulator {
+  let inputTokens = 0
+  let cachedInputTokens = 0
+  let outputTokens = 0
+
+  return {
+    feed(chunk) {
+      const ev = (chunk ?? {}) as AnthropicMessageShape
+      if (ev.type === "message_start" && ev.message?.usage) {
+        const u = ev.message.usage
+        inputTokens =
+          numOr0(u.input_tokens) + numOr0(u.cache_creation_input_tokens)
+        cachedInputTokens = numOr0(u.cache_read_input_tokens)
+        outputTokens = numOr0(u.output_tokens)
+        return
+      }
+      if (ev.type === "message_delta" && ev.usage) {
+        outputTokens = Math.max(outputTokens, numOr0(ev.usage.output_tokens))
+      }
+    },
+    finalize() {
+      return {
+        inputTokens,
+        cachedInputTokens,
+        outputTokens,
+        reasoningTokens: 0,
+        totalTokens: inputTokens + outputTokens,
+      }
+    },
+  }
+}
diff --git a/tests/usage-normalizer.test.ts b/tests/usage-normalizer.test.ts
new file mode 100644
index 000000000..671f4e98d
--- /dev/null
+++ b/tests/usage-normalizer.test.ts
@@ -0,0 +1,142 @@
+import { test, expect, describe } from "bun:test"
+
+import {
+  createAnthropicAccumulator,
+  createOpenAIAccumulator,
+  normalizeAnthropicMessage,
+  normalizeEmbeddings,
+  normalizeOpenAIFinal,
+  UsageMissingError,
+} from "../src/lib/usage-normalizer"
+
+describe("normalizeOpenAIFinal", () => {
+  test("maps prompt/completion/total + cached + reasoning", () => {
+    const out = normalizeOpenAIFinal({
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+      prompt_tokens_details: { cached_tokens: 20 },
+      completion_tokens_details: { reasoning_tokens: 10 },
+    })
+    expect(out).toEqual({
+      inputTokens: 100,
+      cachedInputTokens: 20,
+      outputTokens: 50,
+      reasoningTokens: 10,
+      totalTokens: 150,
+    })
+  })
+
+  test("missing fields default to 0", () => {
+    const out = normalizeOpenAIFinal({
+      prompt_tokens: 10,
+      completion_tokens: 5,
+    })
+    expect(out.inputTokens).toBe(10)
+    expect(out.outputTokens).toBe(5)
+    expect(out.cachedInputTokens).toBe(0)
+    expect(out.reasoningTokens).toBe(0)
+    expect(out.totalTokens).toBe(15)
+  })
+})
+
+describe("normalizeAnthropicMessage", () => {
+  test("folds cache_creation_input_tokens into inputTokens", () => {
+    const out = normalizeAnthropicMessage({
+      usage: {
+        input_tokens: 100,
+        cache_creation_input_tokens: 25,
+        cache_read_input_tokens: 75,
+        output_tokens: 30,
+      },
+    })
+    expect(out.inputTokens).toBe(125)
+    expect(out.cachedInputTokens).toBe(75)
+    expect(out.outputTokens).toBe(30)
+    expect(out.totalTokens).toBe(155)
+  })
+})
+
+describe("normalizeEmbeddings", () => {
+  test("uses prompt_tokens as input", () => {
+    const out = normalizeEmbeddings({ prompt_tokens: 12, total_tokens: 12 })
+    expect(out.inputTokens).toBe(12)
+    expect(out.outputTokens).toBe(0)
+    expect(out.totalTokens).toBe(12)
+  })
+})
+
+describe("createOpenAIAccumulator", () => {
+  test("captures usage from final chunk", () => {
+    const acc = createOpenAIAccumulator()
+    acc.feed({ choices: [{ delta: { content: "hi" } }] })
+    acc.feed({
+      choices: [],
+      usage: {
+        prompt_tokens: 10,
+        completion_tokens: 5,
+        total_tokens: 15,
+      },
+    })
+    const out = acc.finalize()
+    expect(out.inputTokens).toBe(10)
+    expect(out.outputTokens).toBe(5)
+  })
+
+  test("throws when usage chunk never arrives", () => {
+    const acc = createOpenAIAccumulator()
+    acc.feed({ choices: [{ delta: { content: "hi" } }] })
+    expect(() => acc.finalize()).toThrow(UsageMissingError)
+  })
+})
+
+describe("createAnthropicAccumulator", () => {
+  test("aggregates message_start + message_delta", () => {
+    const acc = createAnthropicAccumulator()
+    acc.feed({
+      type: "message_start",
+      message: {
+        usage: {
+          input_tokens: 50,
+          cache_creation_input_tokens: 10,
+          cache_read_input_tokens: 20,
+          output_tokens: 1,
+        },
+      },
+    })
+    acc.feed({
+      type: "content_block_delta",
+      delta: { type: "text_delta", text: "a" },
+    })
+    acc.feed({ type: "message_delta", usage: { output_tokens: 7 } })
+    acc.feed({ type: "message_delta", usage: { output_tokens: 12 } })
+    const out = acc.finalize()
+    expect(out.inputTokens).toBe(60)
+    expect(out.cachedInputTokens).toBe(20)
+    expect(out.outputTokens).toBe(12)
+    expect(out.totalTokens).toBe(72)
+  })
+
+  test("returns sane zeros if only message_start arrived", () => {
+    const acc = createAnthropicAccumulator()
+    acc.feed({
+      type: "message_start",
+      message: { usage: { input_tokens: 5, output_tokens: 1 } },
+    })
+    const out = acc.finalize()
+    expect(out.inputTokens).toBe(5)
+    expect(out.outputTokens).toBe(1)
+  })
+
+  test("returns zeros when nothing arrives", () => {
+    const acc = createAnthropicAccumulator()
+    const out = acc.finalize()
+    expect(out).toEqual({
+      inputTokens: 0,
+      cachedInputTokens: 0,
+      outputTokens: 0,
+      reasoningTokens: 0,
+      totalTokens: 0,
+    })
+  })
+})

From e9d08765acd78e100a8e8db09a9a46342edf06c3 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:38:32 +0800
Subject: [PATCH 07/34] feat(usage): usage recorder writes events + daily
 aggregate (Task #6)

- src/lib/usage-recorder.ts: recordUsage(input) inserts one usage_events row and atomically upserts the matching usage_daily row using SQLite's date(?/1000,'unixepoch','localtime') for the day boundary. Pricing snapshots are pulled from model_pricing at write time; missing rows leave snapshots NULL. premium_request_count = premium_multiplier (or 0). Errors are caught and logged so the response path is never broken. isInternal=true short-circuits the write entirely (for x-internal-pricing-sync calls).

Tests: single insert, increment, missing pricing, isInternal, swallowed errors.

Refs #6

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/usage-recorder.ts    | 139 +++++++++++++++++++++++++++++++++++
 tests/usage-recorder.test.ts | 132 +++++++++++++++++++++++++++++++++
 2 files changed, 271 insertions(+)
 create mode 100644 src/lib/usage-recorder.ts
 create mode 100644 tests/usage-recorder.test.ts

diff --git a/src/lib/usage-recorder.ts b/src/lib/usage-recorder.ts
new file mode 100644
index 000000000..2debabca9
--- /dev/null
+++ b/src/lib/usage-recorder.ts
@@ -0,0 +1,139 @@
+import consola from "consola"
+
+import type { Account } from "./account-pool"
+import type { NormalizedUsage } from "./usage-normalizer"
+
+import { getDb } from "./db"
+
+export type UsageEndpoint = "chat.completions" | "messages" | "embeddings"
+export type UpstreamFormat = "openai" | "anthropic"
+export type UsageStatus = "ok" | "error" | "aborted"
+
+export interface RecordUsageInput {
+  account: Account
+  modelId: string
+  endpoint: UsageEndpoint
+  upstreamFormat: UpstreamFormat
+  isStreaming: boolean
+  usage: NormalizedUsage
+  durationMs: number
+  status: UsageStatus
+  requestId?: string
+  isInternal?: boolean
+}
+
+interface PricingRow {
+  input_per_mtok: number | null
+  cached_input_per_mtok: number | null
+  output_per_mtok: number | null
+  reasoning_per_mtok: number | null
+  premium_unit_price: number | null
+  premium_multiplier: number | null
+}
+
+/**
+ * Record a single upstream usage event and atomically update the daily
+ * aggregate. Errors are swallowed (logged via consola); recorder failure
+ * must not break the caller's response path.
+ */
+export function recordUsage(input: RecordUsageInput): void {
+  if (input.isInternal) return
+
+  try {
+    const db = getDb()
+    const ts = Date.now()
+
+    const pricing = db
+      .query<PricingRow, [string]>(
+        `SELECT input_per_mtok,
+                cached_input_per_mtok,
+                output_per_mtok,
+                reasoning_per_mtok,
+                premium_unit_price,
+                premium_multiplier
+           FROM model_pricing
+          WHERE model_id = ?`,
+      )
+      .get(input.modelId)
+
+    const inputPrice = pricing?.input_per_mtok ?? null
+    const cachedInputPrice = pricing?.cached_input_per_mtok ?? null
+    const outputPrice = pricing?.output_per_mtok ?? null
+    const reasoningPrice = pricing?.reasoning_per_mtok ?? null
+    const premiumUnitPrice = pricing?.premium_unit_price ?? null
+    const premiumMultiplier = pricing?.premium_multiplier ?? null
+    const premiumRequestCount = premiumMultiplier ?? 0
+
+    const insertEvent = db.prepare(
+      `INSERT INTO usage_events (
+         ts, account_name, model_id, endpoint, upstream_format, is_streaming,
+         input_tokens, cached_input_tokens, output_tokens, reasoning_tokens,
+         total_tokens, premium_request_count,
+         input_price_snapshot, cached_input_price_snapshot,
+         output_price_snapshot, reasoning_price_snapshot,
+         premium_unit_price_snapshot, premium_multiplier_snapshot,
+         request_id, status, duration_ms
+       ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    )
+
+    const upsertDaily = db.prepare(
+      `INSERT INTO usage_daily (
+         day, account_name, model_id, endpoint,
+         req_count, input_tokens, cached_input_tokens,
+         output_tokens, reasoning_tokens, total_tokens, premium_requests
+       ) VALUES (
+         date(?/1000, 'unixepoch', 'localtime'),
+         ?, ?, ?, 1, ?, ?, ?, ?, ?, ?
+       )
+       ON CONFLICT(day, account_name, model_id, endpoint) DO UPDATE SET
+         req_count = req_count + 1,
+         input_tokens = input_tokens + excluded.input_tokens,
+         cached_input_tokens = cached_input_tokens + excluded.cached_input_tokens,
+         output_tokens = output_tokens + excluded.output_tokens,
+         reasoning_tokens = reasoning_tokens + excluded.reasoning_tokens,
+         total_tokens = total_tokens + excluded.total_tokens,
+         premium_requests = premium_requests + excluded.premium_requests`,
+    )
+
+    const tx = db.transaction(() => {
+      insertEvent.run(
+        ts,
+        input.account.name,
+        input.modelId,
+        input.endpoint,
+        input.upstreamFormat,
+        input.isStreaming ? 1 : 0,
+        input.usage.inputTokens,
+        input.usage.cachedInputTokens,
+        input.usage.outputTokens,
+        input.usage.reasoningTokens,
+        input.usage.totalTokens,
+        premiumRequestCount,
+        inputPrice,
+        cachedInputPrice,
+        outputPrice,
+        reasoningPrice,
+        premiumUnitPrice,
+        premiumMultiplier,
+        input.requestId ?? null,
+        input.status,
+        input.durationMs,
+      )
+      upsertDaily.run(
+        ts,
+        input.account.name,
+        input.modelId,
+        input.endpoint,
+        input.usage.inputTokens,
+        input.usage.cachedInputTokens,
+        input.usage.outputTokens,
+        input.usage.reasoningTokens,
+        input.usage.totalTokens,
+        premiumRequestCount,
+      )
+    })
+    tx()
+  } catch (err) {
+    consola.error("[usage-recorder] failed to record usage:", err)
+  }
+}
diff --git a/tests/usage-recorder.test.ts b/tests/usage-recorder.test.ts
new file mode 100644
index 000000000..0bed74257
--- /dev/null
+++ b/tests/usage-recorder.test.ts
@@ -0,0 +1,132 @@
+import { test, expect, describe, beforeEach } from "bun:test"
+
+import type { Account } from "../src/lib/account-pool"
+
+import { __resetDbForTests, initDb } from "../src/lib/db"
+import { recordUsage } from "../src/lib/usage-recorder"
+
+const ACCOUNT: Account = {
+  name: "alice",
+  accountType: "individual",
+  githubToken: "ghu_a",
+  copilotToken: "tok_a",
+  copilotTokenRefreshAt: 0,
+  inFlight: 0,
+  lastUsedAt: 0,
+  failureCount: 0,
+}
+
+const baseInput = {
+  account: ACCOUNT,
+  modelId: "gpt-4o",
+  endpoint: "chat.completions" as const,
+  upstreamFormat: "openai" as const,
+  isStreaming: false,
+  usage: {
+    inputTokens: 100,
+    cachedInputTokens: 20,
+    outputTokens: 50,
+    reasoningTokens: 0,
+    totalTokens: 150,
+  },
+  durationMs: 123,
+  status: "ok" as const,
+}
+
+function setupDb() {
+  __resetDbForTests()
+  const db = initDb(":memory:")
+  db.run(
+    "INSERT INTO accounts (name, account_type, created_at) VALUES (?, ?, ?)",
+    [ACCOUNT.name, ACCOUNT.accountType, Date.now()],
+  )
+  return db
+}
+
+describe("recordUsage", () => {
+  beforeEach(() => {
+    __resetDbForTests()
+  })
+
+  test("inserts an event and a daily row", () => {
+    const db = setupDb()
+    db.run(
+      `INSERT INTO model_pricing (
+         model_id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
+         reasoning_per_mtok, premium_multiplier, premium_unit_price,
+         updated_at
+       ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+      ["gpt-4o", 5, 1, 15, 0, 1.0, 0.04, Date.now()],
+    )
+    recordUsage(baseInput)
+
+    const events = db.query("SELECT * FROM usage_events").all() as Array<{
+      account_name: string
+      model_id: string
+      input_tokens: number
+      input_price_snapshot: number
+      premium_request_count: number
+    }>
+    expect(events).toHaveLength(1)
+    expect(events[0].account_name).toBe("alice")
+    expect(events[0].input_price_snapshot).toBe(5)
+    expect(events[0].premium_request_count).toBe(1)
+
+    const daily = db.query("SELECT * FROM usage_daily").all() as Array<{
+      req_count: number
+      input_tokens: number
+      premium_requests: number
+    }>
+    expect(daily).toHaveLength(1)
+    expect(daily[0].req_count).toBe(1)
+    expect(daily[0].input_tokens).toBe(100)
+    expect(daily[0].premium_requests).toBe(1)
+  })
+
+  test("second insert into same (day,account,model,endpoint) increments daily", () => {
+    const db = setupDb()
+    recordUsage(baseInput)
+    recordUsage(baseInput)
+    const daily = db
+      .query<
+        { req_count: number; input_tokens: number },
+        []
+      >("SELECT req_count, input_tokens FROM usage_daily")
+      .all()
+    expect(daily).toHaveLength(1)
+    expect(daily[0].req_count).toBe(2)
+    expect(daily[0].input_tokens).toBe(200)
+  })
+
+  test("missing model_pricing row -> snapshots null and no throw", () => {
+    const db = setupDb()
+    recordUsage(baseInput)
+    const ev = db
+      .query<
+        { input_price_snapshot: number | null; premium_request_count: number },
+        []
+      >("SELECT input_price_snapshot, premium_request_count FROM usage_events")
+      .get()
+    expect(ev?.input_price_snapshot).toBeNull()
+    expect(ev?.premium_request_count).toBe(0)
+  })
+
+  test("isInternal=true inserts nothing", () => {
+    const db = setupDb()
+    recordUsage({ ...baseInput, isInternal: true })
+    const events = db.query("SELECT * FROM usage_events").all()
+    expect(events).toHaveLength(0)
+  })
+
+  test("recorder errors are swallowed", () => {
+    setupDb()
+    // Force an error by passing an invalid endpoint type via cast.
+    expect(() =>
+      recordUsage({
+        ...baseInput,
+        // @ts-expect-error intentional bad value to trigger SQL CHECK fail (none here, but recorder must not throw on weird input)
+        endpoint: undefined,
+      }),
+    ).not.toThrow()
+  })
+})

From dd6ea1c41bde142a6770050fdb1940b9812f54d6 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:40:12 +0800
Subject: [PATCH 08/34] feat(usage): record non-streaming chat-completions
 usage (Task #7)

After a successful non-streaming OpenAI response, the handler builds a NormalizedUsage from response.usage and calls recordUsage with status='ok', the chosen account, durationMs, requestId. On thrown errors before the recorder, a status='error' event with zero tokens is written. The x-internal-pricing-sync: 1 header is honored via isInternal.

Streaming branch unchanged (Task #8 owns it).

Refs #7

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/routes/chat-completions/handler.ts | 60 +++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 5 deletions(-)

diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 21b5bab55..2b27f95c1 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -3,10 +3,17 @@ import type { Context } from "hono"
 import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
+import type { Account } from "~/lib/account-pool"
+
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
+import {
+  normalizeOpenAIFinal,
+  type NormalizedUsage,
+} from "~/lib/usage-normalizer"
+import { recordUsage } from "~/lib/usage-recorder"
 import { isNullish, makeApiContext, resolveAndMapModelId } from "~/lib/utils"
 import { withAccount } from "~/lib/with-account"
 import {
@@ -15,6 +22,14 @@ import {
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
 
+const ZERO_USAGE: NormalizedUsage = {
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  outputTokens: 0,
+  reasoningTokens: 0,
+  totalTokens: 0,
+}
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
@@ -25,12 +40,10 @@ export async function handleCompletion(c: Context) {
   }
   consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
-  // Find the selected model
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
   )
 
-  // Calculate and display token count
   try {
     if (selectedModel) {
       const tokenCount = await getTokenCount(payload, selectedModel)
@@ -52,12 +65,49 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  const response = await withAccount(c, (account) =>
-    createChatCompletions(makeApiContext(account), payload),
-  )
+  const isInternal = c.req.header("x-internal-pricing-sync") === "1"
+  const tStart = Date.now()
+  let usedAccount: Account | undefined
+
+  let response: Awaited<ReturnType<typeof createChatCompletions>>
+  try {
+    response = await withAccount(c, (account) => {
+      usedAccount = account
+      return createChatCompletions(makeApiContext(account), payload)
+    })
+  } catch (err) {
+    if (usedAccount) {
+      recordUsage({
+        account: usedAccount,
+        modelId: payload.model,
+        endpoint: "chat.completions",
+        upstreamFormat: "openai",
+        isStreaming: Boolean(payload.stream),
+        usage: ZERO_USAGE,
+        durationMs: Date.now() - tStart,
+        status: "error",
+        isInternal,
+      })
+    }
+    throw err
+  }
 
   if (isNonStreaming(response)) {
     consola.debug("Non-streaming response:", JSON.stringify(response))
+    if (usedAccount) {
+      recordUsage({
+        account: usedAccount,
+        modelId: payload.model,
+        endpoint: "chat.completions",
+        upstreamFormat: "openai",
+        isStreaming: false,
+        usage: normalizeOpenAIFinal(response.usage),
+        durationMs: Date.now() - tStart,
+        status: "ok",
+        requestId: response.id,
+        isInternal,
+      })
+    }
     return c.json(response)
   }
 

From 89991803da296205950c48b3d11eaae8a087aa4e Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:43:59 +0800
Subject: [PATCH 09/34] feat(usage): record streaming chat-completions usage
 (Task #8)

- create-chat-completions.ts forces stream_options.include_usage=true on streaming requests so the upstream emits a final usage frame.
- Streaming branch uses an OpenAI accumulator: every chunk is parsed, fed, then forwarded untouched. After the stream closes, recordUsage is called with status='ok' (or 'aborted' if c.req.raw.signal.aborted, or 'error' on iteration error). Missing usage frame logs a warn and records zero tokens with status='error'.

Refs #8

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/routes/chat-completions/handler.ts        | 107 +++++++++++++++++-
 .../copilot/create-chat-completions.ts        |  26 ++++-
 2 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 2b27f95c1..2038ffca3 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -10,8 +10,11 @@ import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
 import {
+  createOpenAIAccumulator,
   normalizeOpenAIFinal,
+  UsageMissingError,
   type NormalizedUsage,
+  type StreamUsageAccumulator,
 } from "~/lib/usage-normalizer"
 import { recordUsage } from "~/lib/usage-recorder"
 import { isNullish, makeApiContext, resolveAndMapModelId } from "~/lib/utils"
@@ -30,6 +33,92 @@ const ZERO_USAGE: NormalizedUsage = {
   totalTokens: 0,
 }
 
+interface RecordContext {
+  account: Account
+  modelId: string
+  isInternal: boolean
+  tStart: number
+}
+
+function feedFrame(
+  rawEvent: { data?: string },
+  accumulator: StreamUsageAccumulator,
+): string | undefined {
+  if (!rawEvent.data || rawEvent.data === "[DONE]") return undefined
+  try {
+    const parsed = JSON.parse(rawEvent.data) as {
+      id?: string
+      usage?: unknown
+    }
+    accumulator.feed(parsed)
+    return parsed.id
+  } catch {
+    return undefined
+  }
+}
+
+function finalizeStreamUsage(
+  accumulator: StreamUsageAccumulator,
+  status: "ok" | "error" | "aborted",
+): { usage: NormalizedUsage; status: "ok" | "error" | "aborted" } {
+  try {
+    return { usage: accumulator.finalize(), status }
+  } catch (err) {
+    if (err instanceof UsageMissingError) {
+      consola.warn(
+        "Streaming completed without an include_usage frame; recording zero usage",
+      )
+    } else {
+      consola.error("Failed to finalize stream usage:", err)
+    }
+    return {
+      usage: ZERO_USAGE,
+      status: status === "ok" ? "error" : status,
+    }
+  }
+}
+
+function streamAndRecord(
+  c: Context,
+  response: AsyncIterable<{ data?: string }>,
+  ctx: RecordContext,
+) {
+  return streamSSE(c, async (stream) => {
+    const accumulator = createOpenAIAccumulator()
+    let status: "ok" | "error" | "aborted" = "ok"
+    let lastRequestId: string | undefined
+    try {
+      for await (const rawEvent of response) {
+        if (c.req.raw.signal.aborted) {
+          status = "aborted"
+          break
+        }
+        const id = feedFrame(rawEvent, accumulator)
+        if (id) lastRequestId = id
+        consola.debug("Streaming chunk:", JSON.stringify(rawEvent))
+        await stream.writeSSE(rawEvent as SSEMessage)
+      }
+    } catch (err) {
+      status = "error"
+      consola.error("Streaming chat-completions error:", err)
+    }
+
+    const result = finalizeStreamUsage(accumulator, status)
+    recordUsage({
+      account: ctx.account,
+      modelId: ctx.modelId,
+      endpoint: "chat.completions",
+      upstreamFormat: "openai",
+      isStreaming: true,
+      usage: result.usage,
+      durationMs: Date.now() - ctx.tStart,
+      status: result.status,
+      requestId: lastRequestId,
+      isInternal: ctx.isInternal,
+    })
+  })
+}
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
@@ -112,11 +201,19 @@ export async function handleCompletion(c: Context) {
   }
 
   consola.debug("Streaming response")
-  return streamSSE(c, async (stream) => {
-    for await (const chunk of response) {
-      consola.debug("Streaming chunk:", JSON.stringify(chunk))
-      await stream.writeSSE(chunk as SSEMessage)
-    }
+  if (!usedAccount) {
+    // Should never happen — withAccount always invokes the callback.
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of response) {
+        await stream.writeSSE(chunk as SSEMessage)
+      }
+    })
+  }
+  return streamAndRecord(c, response, {
+    account: usedAccount,
+    modelId: payload.model,
+    isInternal,
+    tStart,
   })
 }
 
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index c3e031b9c..c245112a4 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -12,14 +12,31 @@ export const createChatCompletions = async (
 ) => {
   if (!ctx.account.copilotToken) throw new Error("Copilot token not found")
 
-  const enableVision = payload.messages.some(
+  // For streaming requests, force `stream_options.include_usage = true` so
+  // the upstream emits a final usage frame we can record. Some clients send
+  // `include_usage: false` explicitly — override anyway and log at debug.
+  let outgoing = payload
+  if (payload.stream) {
+    const existing = payload.stream_options ?? {}
+    if (existing.include_usage !== true) {
+      consola.debug(
+        "Forcing stream_options.include_usage=true for usage tracking",
+      )
+    }
+    outgoing = {
+      ...payload,
+      stream_options: { ...existing, include_usage: true },
+    }
+  }
+
+  const enableVision = outgoing.messages.some(
     (x) =>
       typeof x.content !== "string"
       && x.content?.some((x) => x.type === "image_url"),
   )
 
   // Agent/user check for X-Initiator header
-  const isAgentCall = payload.messages.some((msg) =>
+  const isAgentCall = outgoing.messages.some((msg) =>
     ["assistant", "tool"].includes(msg.role),
   )
 
@@ -31,7 +48,7 @@ export const createChatCompletions = async (
   const response = await fetch(`${copilotBaseUrl(ctx)}/chat/completions`, {
     method: "POST",
     headers,
-    body: JSON.stringify(payload),
+    body: JSON.stringify(outgoing),
   })
 
   if (!response.ok) {
@@ -39,7 +56,7 @@ export const createChatCompletions = async (
     throw new HTTPError("Failed to create chat completions", response)
   }
 
-  if (payload.stream) {
+  if (outgoing.stream) {
     return events(response)
   }
 
@@ -133,6 +150,7 @@ export interface ChatCompletionsPayload {
   stop?: string | Array<string> | null
   n?: number | null
   stream?: boolean | null
+  stream_options?: { include_usage?: boolean } | null
 
   frequency_penalty?: number | null
   presence_penalty?: number | null

From 93bd5348d2deb0221cf87bcb98ada0405af498ab Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:45:40 +0800
Subject: [PATCH 10/34] feat(usage): record /v1/embeddings (Task #9)

Embeddings handler now wraps the upstream call with withAccount and records a usage_events row with endpoint='embeddings', upstreamFormat='openai'. On error, a status='error' row with zero usage is recorded.

Refs #9

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/routes/embeddings/route.ts | 56 +++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/src/routes/embeddings/route.ts b/src/routes/embeddings/route.ts
index 2bff9a3a2..14a8aab3b 100644
--- a/src/routes/embeddings/route.ts
+++ b/src/routes/embeddings/route.ts
@@ -1,6 +1,13 @@
 import { Hono } from "hono"
 
+import type { Account } from "~/lib/account-pool"
+
 import { forwardError } from "~/lib/error"
+import {
+  normalizeEmbeddings,
+  type NormalizedUsage,
+} from "~/lib/usage-normalizer"
+import { recordUsage } from "~/lib/usage-recorder"
 import { makeApiContext } from "~/lib/utils"
 import { withAccount } from "~/lib/with-account"
 import {
@@ -8,16 +15,57 @@ import {
   type EmbeddingRequest,
 } from "~/services/copilot/create-embeddings"
 
+const ZERO_USAGE: NormalizedUsage = {
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  outputTokens: 0,
+  reasoningTokens: 0,
+  totalTokens: 0,
+}
+
 export const embeddingRoutes = new Hono()
 
 embeddingRoutes.post("/", async (c) => {
+  const isInternal = c.req.header("x-internal-pricing-sync") === "1"
+  const tStart = Date.now()
+  let usedAccount: Account | undefined
+  let modelId = ""
+
   try {
-    const paylod = await c.req.json<EmbeddingRequest>()
-    const response = await withAccount(c, (account) =>
-      createEmbeddings(makeApiContext(account), paylod),
-    )
+    const payload = await c.req.json<EmbeddingRequest>()
+    modelId = payload.model
+    const response = await withAccount(c, (account) => {
+      usedAccount = account
+      return createEmbeddings(makeApiContext(account), payload)
+    })
+    if (usedAccount) {
+      recordUsage({
+        account: usedAccount,
+        modelId,
+        endpoint: "embeddings",
+        upstreamFormat: "openai",
+        isStreaming: false,
+        usage: normalizeEmbeddings(response.usage),
+        durationMs: Date.now() - tStart,
+        status: "ok",
+        isInternal,
+      })
+    }
     return c.json(response)
   } catch (error) {
+    if (usedAccount && modelId) {
+      recordUsage({
+        account: usedAccount,
+        modelId,
+        endpoint: "embeddings",
+        upstreamFormat: "openai",
+        isStreaming: false,
+        usage: ZERO_USAGE,
+        durationMs: Date.now() - tStart,
+        status: "error",
+        isInternal,
+      })
+    }
     return await forwardError(c, error)
   }
 })

From 3b5d4bd44e8a52bf2f8fe367df80b3789246b763 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:49:12 +0800
Subject: [PATCH 11/34] feat(usage): record /v1/messages usage (Task #10)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /v1/messages route translates Anthropic→OpenAI before sending, so we use the OpenAI accumulator to capture usage. endpoint='messages', upstreamFormat='anthropic' tags the API surface.

Stream branch: feed every chunk into accumulator, finalize on close (ok/aborted/error). Non-stream: normalize response.usage. Errors before the upstream call write a status='error' row.

Refs #10

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/routes/messages/handler.ts | 231 ++++++++++++++++++++++++++++-----
 1 file changed, 197 insertions(+), 34 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 345c4252b..9c453599a 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -3,9 +3,18 @@ import type { Context } from "hono"
 import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
+import type { Account } from "~/lib/account-pool"
+
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import {
+  createOpenAIAccumulator,
+  normalizeOpenAIFinal,
+  UsageMissingError,
+  type NormalizedUsage,
+} from "~/lib/usage-normalizer"
+import { recordUsage } from "~/lib/usage-recorder"
 import { makeApiContext, resolveAndMapModelId } from "~/lib/utils"
 import { withAccount } from "~/lib/with-account"
 import {
@@ -24,6 +33,129 @@ import {
 } from "./non-stream-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
 
+const ZERO_USAGE: NormalizedUsage = {
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  outputTokens: 0,
+  reasoningTokens: 0,
+  totalTokens: 0,
+}
+
+interface RecordCtx {
+  account: Account
+  modelId: string
+  isInternal: boolean
+  tStart: number
+}
+
+interface RecordOkArgs {
+  ctx: RecordCtx
+  usage: NormalizedUsage
+  isStreaming: boolean
+  requestId?: string
+}
+
+interface RecordFailureArgs {
+  ctx: RecordCtx
+  status: "error" | "aborted"
+  isStreaming: boolean
+  usage?: NormalizedUsage
+}
+
+function recordOk(args: RecordOkArgs) {
+  recordUsage({
+    account: args.ctx.account,
+    modelId: args.ctx.modelId,
+    endpoint: "messages",
+    upstreamFormat: "anthropic",
+    isStreaming: args.isStreaming,
+    usage: args.usage,
+    durationMs: Date.now() - args.ctx.tStart,
+    status: "ok",
+    requestId: args.requestId,
+    isInternal: args.ctx.isInternal,
+  })
+}
+
+function recordFailure(args: RecordFailureArgs) {
+  recordUsage({
+    account: args.ctx.account,
+    modelId: args.ctx.modelId,
+    endpoint: "messages",
+    upstreamFormat: "anthropic",
+    isStreaming: args.isStreaming,
+    usage: args.usage ?? ZERO_USAGE,
+    durationMs: Date.now() - args.ctx.tStart,
+    status: args.status,
+    isInternal: args.ctx.isInternal,
+  })
+}
+
+function streamAndRecord(
+  c: Context,
+  response: AsyncIterable<{ data?: string }>,
+  ctx: RecordCtx,
+) {
+  return streamSSE(c, async (stream) => {
+    const accumulator = createOpenAIAccumulator()
+    const streamState: AnthropicStreamState = {
+      messageStartSent: false,
+      contentBlockIndex: 0,
+      contentBlockOpen: false,
+      toolCalls: {},
+    }
+    let status: "ok" | "error" | "aborted" = "ok"
+    let lastRequestId: string | undefined
+
+    try {
+      for await (const rawEvent of response) {
+        if (c.req.raw.signal.aborted) {
+          status = "aborted"
+          break
+        }
+        if (rawEvent.data === "[DONE]") break
+        if (!rawEvent.data) continue
+
+        const chunk = JSON.parse(rawEvent.data) as ChatCompletionChunk
+        if (chunk.id) lastRequestId = chunk.id
+        accumulator.feed(chunk)
+
+        const events = translateChunkToAnthropicEvents(chunk, streamState)
+        for (const event of events) {
+          await stream.writeSSE({
+            event: event.type,
+            data: JSON.stringify(event),
+          })
+        }
+      }
+    } catch (err) {
+      status = "error"
+      consola.error("Streaming /v1/messages error:", err)
+    }
+
+    let usage: NormalizedUsage
+    try {
+      usage = accumulator.finalize()
+    } catch (err) {
+      if (err instanceof UsageMissingError) {
+        consola.warn(
+          "Anthropic stream completed without an include_usage frame; recording zero usage",
+        )
+      } else {
+        consola.error("Failed to finalize Anthropic stream usage:", err)
+      }
+      usage = ZERO_USAGE
+      if (status === "ok") status = "error"
+    }
+
+    if (status === "ok") {
+      recordOk({ ctx, usage, isStreaming: true, requestId: lastRequestId })
+    } else {
+      recordFailure({ ctx, status, isStreaming: true, usage })
+    }
+  })
+}
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
@@ -48,9 +180,31 @@ export async function handleCompletion(c: Context) {
     await awaitApproval()
   }
 
-  const response = await withAccount(c, (account) =>
-    createChatCompletions(makeApiContext(account), openAIPayload),
-  )
+  const isInternal = c.req.header("x-internal-pricing-sync") === "1"
+  const tStart = Date.now()
+  let usedAccount: Account | undefined
+
+  let response: Awaited<ReturnType<typeof createChatCompletions>>
+  try {
+    response = await withAccount(c, (account) => {
+      usedAccount = account
+      return createChatCompletions(makeApiContext(account), openAIPayload)
+    })
+  } catch (err) {
+    if (usedAccount) {
+      recordFailure({
+        ctx: {
+          account: usedAccount,
+          modelId: openAIPayload.model,
+          isInternal,
+          tStart,
+        },
+        status: "error",
+        isStreaming: Boolean(openAIPayload.stream),
+      })
+    }
+    throw err
+  }
 
   if (isNonStreaming(response)) {
     consola.debug(
@@ -58,43 +212,52 @@ export async function handleCompletion(c: Context) {
       JSON.stringify(response).slice(-400),
     )
     const anthropicResponse = translateToAnthropic(response)
-    consola.debug(
-      "Translated Anthropic response:",
-      JSON.stringify(anthropicResponse),
-    )
+    if (usedAccount) {
+      recordOk({
+        ctx: {
+          account: usedAccount,
+          modelId: openAIPayload.model,
+          isInternal,
+          tStart,
+        },
+        usage: normalizeOpenAIFinal(response.usage),
+        isStreaming: false,
+        requestId: response.id,
+      })
+    }
     return c.json(anthropicResponse)
   }
 
   consola.debug("Streaming response from Copilot")
-  return streamSSE(c, async (stream) => {
-    const streamState: AnthropicStreamState = {
-      messageStartSent: false,
-      contentBlockIndex: 0,
-      contentBlockOpen: false,
-      toolCalls: {},
-    }
-
-    for await (const rawEvent of response) {
-      consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
-      if (rawEvent.data === "[DONE]") {
-        break
+  if (!usedAccount) {
+    return streamSSE(c, async (stream) => {
+      const streamState: AnthropicStreamState = {
+        messageStartSent: false,
+        contentBlockIndex: 0,
+        contentBlockOpen: false,
+        toolCalls: {},
       }
-
-      if (!rawEvent.data) {
-        continue
+      for await (const rawEvent of response) {
+        if (rawEvent.data === "[DONE]") break
+        if (!rawEvent.data) continue
+        const chunk = JSON.parse(rawEvent.data) as ChatCompletionChunk
+        for (const event of translateChunkToAnthropicEvents(
+          chunk,
+          streamState,
+        )) {
+          await stream.writeSSE({
+            event: event.type,
+            data: JSON.stringify(event),
+          })
+        }
       }
-
-      const chunk = JSON.parse(rawEvent.data) as ChatCompletionChunk
-      const events = translateChunkToAnthropicEvents(chunk, streamState)
-
-      for (const event of events) {
-        consola.debug("Translated Anthropic event:", JSON.stringify(event))
-        await stream.writeSSE({
-          event: event.type,
-          data: JSON.stringify(event),
-        })
-      }
-    }
+    })
+  }
+  return streamAndRecord(c, response, {
+    account: usedAccount,
+    modelId: openAIPayload.model,
+    isInternal,
+    tStart,
   })
 }
 

From eb45f3ac558e489384254b3d20f9987ec2856d71 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:53:26 +0800
Subject: [PATCH 12/34] feat(usage): /usage returns three-lens cost stats (Task
 #11)

- src/lib/usage-stats.ts: computeUsageStats(filters) builds the three SQL templates (historical/current/timeline) and returns totals + byAccount.byModel.endpoint_breakdown + daily + missing_pricing per design doc 03.
- src/routes/usage/route.ts: parses ?from/?to/?account/?model/?endpoint/?lens; returns { ...primaryAccountQuotaForBackcompat, quota: { byAccount, primary }, stats }.

Tests: historical from snapshots, current uses live pricing, missing_pricing list, endpoint filter.

Refs #11

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/usage-stats.ts    | 301 ++++++++++++++++++++++++++++++++++++++
 src/routes/usage/route.ts |  86 ++++++++++-
 tests/usage-stats.test.ts | 162 ++++++++++++++++++++
 3 files changed, 543 insertions(+), 6 deletions(-)
 create mode 100644 src/lib/usage-stats.ts
 create mode 100644 tests/usage-stats.test.ts

diff --git a/src/lib/usage-stats.ts b/src/lib/usage-stats.ts
new file mode 100644
index 000000000..9ed6f9b87
--- /dev/null
+++ b/src/lib/usage-stats.ts
@@ -0,0 +1,301 @@
+import type { Database } from "bun:sqlite"
+
+import { getDb } from "./db"
+
+export type Lens = "historical" | "current" | "timeline"
+
+export interface UsageStatsFilters {
+  from: number
+  to: number
+  account?: string
+  model?: string
+  endpoint?: string
+  lens: Lens
+}
+
+export interface TokenTotals {
+  input: number
+  cached_input: number
+  output: number
+  reasoning: number
+  cost_usd: number | null
+}
+
+export interface PremiumTotals {
+  requests: number
+  cost_usd: number | null
+}
+
+export interface UsageStats {
+  range: { from: number; to: number }
+  currency: string
+  lens: Lens
+  totals: { token: TokenTotals; premium: PremiumTotals }
+  byAccount: Array<{
+    name: string
+    totals: { token: TokenTotals; premium: PremiumTotals }
+    byModel: Array<{
+      model: string
+      endpoint_breakdown: Record<string, TokenTotals & PremiumTotals>
+      token: TokenTotals
+      premium: PremiumTotals
+    }>
+  }>
+  daily: Array<{
+    day: string
+    account: string
+    model: string
+    token: TokenTotals
+    premium: PremiumTotals
+  }>
+  missing_pricing: Array<string>
+}
+
+const COST_EXPRESSIONS: Record<Lens, { table: string; cost: string }> = {
+  historical: {
+    table: "usage_events ue",
+    cost: `(
+      ue.input_tokens / 1e6 * ue.input_price_snapshot
+      + ue.cached_input_tokens / 1e6 * ue.cached_input_price_snapshot
+      + ue.output_tokens / 1e6 * ue.output_price_snapshot
+      + ue.reasoning_tokens / 1e6 * ue.reasoning_price_snapshot
+    )`,
+  },
+  current: {
+    table:
+      "usage_events ue LEFT JOIN model_pricing mp ON mp.model_id = ue.model_id",
+    cost: `(
+      ue.input_tokens / 1e6 * mp.input_per_mtok
+      + ue.cached_input_tokens / 1e6 * mp.cached_input_per_mtok
+      + ue.output_tokens / 1e6 * mp.output_per_mtok
+      + ue.reasoning_tokens / 1e6 * mp.reasoning_per_mtok
+    )`,
+  },
+  timeline: {
+    table:
+      "usage_events ue LEFT JOIN model_pricing_versions pv ON pv.model_id = ue.model_id AND ue.ts >= pv.effective_from AND (pv.effective_to IS NULL OR ue.ts < pv.effective_to)",
+    cost: `(
+      ue.input_tokens / 1e6 * pv.input_per_mtok
+      + ue.cached_input_tokens / 1e6 * pv.cached_input_per_mtok
+      + ue.output_tokens / 1e6 * pv.output_per_mtok
+      + ue.reasoning_tokens / 1e6 * pv.reasoning_per_mtok
+    )`,
+  },
+}
+
+interface FilterClause {
+  sql: string
+  params: Array<string | number>
+}
+
+function buildFilter(f: UsageStatsFilters): FilterClause {
+  const where: Array<string> = ["ue.ts BETWEEN ? AND ?"]
+  const params: Array<string | number> = [f.from, f.to]
+  if (f.account) {
+    where.push("ue.account_name = ?")
+    params.push(f.account)
+  }
+  if (f.model) {
+    where.push("ue.model_id = ?")
+    params.push(f.model)
+  }
+  if (f.endpoint) {
+    where.push("ue.endpoint = ?")
+    params.push(f.endpoint)
+  }
+  return { sql: where.join(" AND "), params }
+}
+
+interface AggregateRow {
+  input_tokens: number
+  cached_input_tokens: number
+  output_tokens: number
+  reasoning_tokens: number
+  cost_usd: number | null
+  premium_requests: number
+  premium_cost_usd: number | null
+}
+
+interface ByAccountRow extends AggregateRow {
+  account_name: string
+}
+
+interface ByAccountModelRow extends AggregateRow {
+  account_name: string
+  model_id: string
+  endpoint: string
+}
+
+interface DailyRow extends AggregateRow {
+  day: string
+  account_name: string
+  model_id: string
+}
+
+const COMMON_AGGREGATE = (cost: string) => `
+  SUM(ue.input_tokens) AS input_tokens,
+  SUM(ue.cached_input_tokens) AS cached_input_tokens,
+  SUM(ue.output_tokens) AS output_tokens,
+  SUM(ue.reasoning_tokens) AS reasoning_tokens,
+  SUM(${cost}) AS cost_usd,
+  SUM(ue.premium_request_count) AS premium_requests,
+  SUM(ue.premium_request_count * COALESCE(ue.premium_unit_price_snapshot, 0))
+    AS premium_cost_usd
+`
+
+function tokenTotals(r: AggregateRow): TokenTotals {
+  return {
+    input: r.input_tokens || 0,
+    cached_input: r.cached_input_tokens || 0,
+    output: r.output_tokens || 0,
+    reasoning: r.reasoning_tokens || 0,
+    cost_usd: r.cost_usd,
+  }
+}
+
+function premiumTotals(r: AggregateRow): PremiumTotals {
+  return {
+    requests: r.premium_requests || 0,
+    cost_usd: r.premium_cost_usd,
+  }
+}
+
+function buildByAccount(
+  byAccountRows: Array<ByAccountRow>,
+  byAccountModelRows: Array<ByAccountModelRow>,
+): UsageStats["byAccount"] {
+  return byAccountRows.map((acc) => {
+    const modelMap = new Map<
+      string,
+      {
+        model: string
+        endpoint_breakdown: Record<string, TokenTotals & PremiumTotals>
+        agg: AggregateRow
+      }
+    >()
+    for (const row of byAccountModelRows) {
+      if (row.account_name !== acc.account_name) continue
+      let entry = modelMap.get(row.model_id)
+      if (!entry) {
+        entry = {
+          model: row.model_id,
+          endpoint_breakdown: {},
+          agg: {
+            input_tokens: 0,
+            cached_input_tokens: 0,
+            output_tokens: 0,
+            reasoning_tokens: 0,
+            cost_usd: 0,
+            premium_requests: 0,
+            premium_cost_usd: 0,
+          },
+        }
+        modelMap.set(row.model_id, entry)
+      }
+      entry.endpoint_breakdown[row.endpoint] = {
+        ...tokenTotals(row),
+        ...premiumTotals(row),
+      }
+      entry.agg.input_tokens += row.input_tokens || 0
+      entry.agg.cached_input_tokens += row.cached_input_tokens || 0
+      entry.agg.output_tokens += row.output_tokens || 0
+      entry.agg.reasoning_tokens += row.reasoning_tokens || 0
+      entry.agg.cost_usd = (entry.agg.cost_usd ?? 0) + (row.cost_usd ?? 0)
+      entry.agg.premium_requests += row.premium_requests || 0
+      entry.agg.premium_cost_usd =
+        (entry.agg.premium_cost_usd ?? 0) + (row.premium_cost_usd ?? 0)
+    }
+    return {
+      name: acc.account_name,
+      totals: { token: tokenTotals(acc), premium: premiumTotals(acc) },
+      byModel: [...modelMap.values()].map((m) => ({
+        model: m.model,
+        endpoint_breakdown: m.endpoint_breakdown,
+        token: tokenTotals(m.agg),
+        premium: premiumTotals(m.agg),
+      })),
+    }
+  })
+}
+
+export function computeUsageStats(filters: UsageStatsFilters): UsageStats {
+  const db: Database = getDb()
+  const { table, cost } = COST_EXPRESSIONS[filters.lens]
+  const filter = buildFilter(filters)
+
+  const totalsRow =
+    db
+      .query<
+        AggregateRow,
+        Array<string | number>
+      >(`SELECT ${COMMON_AGGREGATE(cost)} FROM ${table} WHERE ${filter.sql}`)
+      .get(...filter.params)
+    ?? ({
+      input_tokens: 0,
+      cached_input_tokens: 0,
+      output_tokens: 0,
+      reasoning_tokens: 0,
+      cost_usd: 0,
+      premium_requests: 0,
+      premium_cost_usd: 0,
+    } as AggregateRow)
+
+  const byAccountRows = db
+    .query<ByAccountRow, Array<string | number>>(
+      `SELECT ue.account_name, ${COMMON_AGGREGATE(cost)}
+       FROM ${table}
+       WHERE ${filter.sql}
+       GROUP BY ue.account_name
+       ORDER BY ue.account_name`,
+    )
+    .all(...filter.params)
+
+  const byAccountModelRows = db
+    .query<ByAccountModelRow, Array<string | number>>(
+      `SELECT ue.account_name, ue.model_id, ue.endpoint, ${COMMON_AGGREGATE(cost)}
+       FROM ${table}
+       WHERE ${filter.sql}
+       GROUP BY ue.account_name, ue.model_id, ue.endpoint
+       ORDER BY ue.account_name, ue.model_id, ue.endpoint`,
+    )
+    .all(...filter.params)
+
+  const dailyRows = db
+    .query<DailyRow, Array<string | number>>(
+      `SELECT date(ue.ts/1000, 'unixepoch', 'localtime') AS day,
+              ue.account_name, ue.model_id, ${COMMON_AGGREGATE(cost)}
+       FROM ${table}
+       WHERE ${filter.sql}
+       GROUP BY day, ue.account_name, ue.model_id
+       ORDER BY day, ue.account_name, ue.model_id`,
+    )
+    .all(...filter.params)
+
+  const missing = db
+    .query<{ model_id: string }, [number, number]>(
+      `SELECT DISTINCT model_id FROM usage_events
+       WHERE model_id NOT IN (SELECT model_id FROM model_pricing)
+         AND ts BETWEEN ? AND ?`,
+    )
+    .all(filters.from, filters.to)
+    .map((r) => r.model_id)
+
+  return {
+    range: { from: filters.from, to: filters.to },
+    currency: "USD",
+    lens: filters.lens,
+    totals: {
+      token: tokenTotals(totalsRow),
+      premium: premiumTotals(totalsRow),
+    },
+    byAccount: buildByAccount(byAccountRows, byAccountModelRows),
+    daily: dailyRows.map((r) => ({
+      day: r.day,
+      account: r.account_name,
+      model: r.model_id,
+      token: tokenTotals(r),
+      premium: premiumTotals(r),
+    })),
+    missing_pricing: missing,
+  }
+}
diff --git a/src/routes/usage/route.ts b/src/routes/usage/route.ts
index 847a2f94e..b82d575a4 100644
--- a/src/routes/usage/route.ts
+++ b/src/routes/usage/route.ts
@@ -1,16 +1,90 @@
 import { Hono } from "hono"
 
-import { defaultApiContext } from "~/lib/utils"
+import { state } from "~/lib/state"
+import {
+  computeUsageStats,
+  type Lens,
+  type UsageStatsFilters,
+} from "~/lib/usage-stats"
+import { makeApiContext } from "~/lib/utils"
 import { getCopilotUsage } from "~/services/github/get-copilot-usage"
 
 export const usageRoute = new Hono()
 
+const VALID_LENSES: Array<Lens> = ["historical", "current", "timeline"]
+const VALID_ENDPOINTS = new Set(["chat.completions", "messages", "embeddings"])
+
+const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000
+
+function parseFilters(req: Request): UsageStatsFilters {
+  const url = new URL(req.url)
+  const now = Date.now()
+  const fromRaw = url.searchParams.get("from")
+  const toRaw = url.searchParams.get("to")
+  const from = fromRaw ? Number.parseInt(fromRaw, 10) : now - THIRTY_DAYS_MS
+  const to = toRaw ? Number.parseInt(toRaw, 10) : now
+  const lensRaw = url.searchParams.get("lens") ?? "historical"
+  const lens: Lens =
+    (VALID_LENSES as Array<string>).includes(lensRaw) ?
+      (lensRaw as Lens)
+    : "historical"
+  const endpoint = url.searchParams.get("endpoint")
+  return {
+    from,
+    to,
+    account: url.searchParams.get("account") ?? undefined,
+    model: url.searchParams.get("model") ?? undefined,
+    endpoint: endpoint && VALID_ENDPOINTS.has(endpoint) ? endpoint : undefined,
+    lens,
+  }
+}
+
+interface QuotaPayload {
+  byAccount: Array<{ name: string; quota?: unknown; error?: string }>
+  primary?: unknown
+}
+
+async function fetchQuota(): Promise<QuotaPayload> {
+  const accounts = state.pool?.accounts ?? []
+  const results = await Promise.all(
+    accounts.map(async (account) => {
+      try {
+        const quota = await getCopilotUsage(makeApiContext(account))
+        return { name: account.name, quota }
+      } catch (err) {
+        return { name: account.name, error: (err as Error).message }
+      }
+    }),
+  )
+  const primary = results.find((r) => "quota" in r && r.quota)?.quota
+  return { byAccount: results, primary }
+}
+
 usageRoute.get("/", async (c) => {
+  const stats = (() => {
+    try {
+      return computeUsageStats(parseFilters(c.req.raw))
+    } catch (err) {
+      console.error("Error computing usage stats:", err)
+      return null
+    }
+  })()
+
+  let quota: unknown = null
+  let primary: unknown = null
   try {
-    const usage = await getCopilotUsage(defaultApiContext())
-    return c.json(usage)
-  } catch (error) {
-    console.error("Error fetching Copilot usage:", error)
-    return c.json({ error: "Failed to fetch Copilot usage" }, 500)
+    const result = await fetchQuota()
+    quota = result
+    primary = result.primary
+  } catch (err) {
+    console.error("Error fetching Copilot quota:", err)
   }
+
+  // Backwards compat: top-level fields from the old payload (when present)
+  // are spread from the primary account's response.
+  return c.json({
+    ...(primary as Record<string, unknown> | null | undefined),
+    quota,
+    stats,
+  })
 })
diff --git a/tests/usage-stats.test.ts b/tests/usage-stats.test.ts
new file mode 100644
index 000000000..9503f5eb6
--- /dev/null
+++ b/tests/usage-stats.test.ts
@@ -0,0 +1,162 @@
+import { test, expect, describe, beforeEach } from "bun:test"
+
+import type { Account } from "../src/lib/account-pool"
+
+import { __resetDbForTests, getDb, initDb } from "../src/lib/db"
+import { recordUsage } from "../src/lib/usage-recorder"
+import { computeUsageStats } from "../src/lib/usage-stats"
+
+const ACCOUNT: Account = {
+  name: "alice",
+  accountType: "individual",
+  githubToken: "ghu_a",
+  copilotToken: "tok_a",
+  copilotTokenRefreshAt: 0,
+  inFlight: 0,
+  lastUsedAt: 0,
+  failureCount: 0,
+}
+
+function setupDb() {
+  __resetDbForTests()
+  const db = initDb(":memory:")
+  db.run(
+    "INSERT INTO accounts (name, account_type, created_at) VALUES (?, ?, ?)",
+    [ACCOUNT.name, ACCOUNT.accountType, Date.now()],
+  )
+  db.run(
+    `INSERT INTO model_pricing (
+        model_id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
+        reasoning_per_mtok, premium_multiplier, premium_unit_price, updated_at
+     ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
+    ["gpt-4o", 2, 0, 8, 0, 1, 0.04, Date.now()],
+  )
+  return db
+}
+
+const baseRecord = {
+  account: ACCOUNT,
+  modelId: "gpt-4o",
+  endpoint: "chat.completions" as const,
+  upstreamFormat: "openai" as const,
+  isStreaming: false,
+  durationMs: 100,
+  status: "ok" as const,
+}
+
+describe("computeUsageStats", () => {
+  beforeEach(() => {
+    __resetDbForTests()
+  })
+
+  test("historical lens computes cost from snapshots", () => {
+    setupDb()
+    recordUsage({
+      ...baseRecord,
+      usage: {
+        inputTokens: 1_000_000,
+        cachedInputTokens: 0,
+        outputTokens: 500_000,
+        reasoningTokens: 0,
+        totalTokens: 1_500_000,
+      },
+    })
+    const stats = computeUsageStats({
+      from: 0,
+      to: Date.now() + 1,
+      lens: "historical",
+    })
+    expect(stats.totals.token.input).toBe(1_000_000)
+    expect(stats.totals.token.output).toBe(500_000)
+    // 1M * $2 + 0.5M * $8 = $2 + $4 = $6
+    expect(stats.totals.token.cost_usd).toBeCloseTo(6, 5)
+    expect(stats.byAccount).toHaveLength(1)
+    expect(stats.byAccount[0].byModel[0].model).toBe("gpt-4o")
+  })
+
+  test("current lens uses model_pricing live row", () => {
+    setupDb()
+    recordUsage({
+      ...baseRecord,
+      usage: {
+        inputTokens: 1_000_000,
+        cachedInputTokens: 0,
+        outputTokens: 0,
+        reasoningTokens: 0,
+        totalTokens: 1_000_000,
+      },
+    })
+    // Bump live pricing
+    getDb().run(
+      "UPDATE model_pricing SET input_per_mtok = ? WHERE model_id = ?",
+      [10, "gpt-4o"],
+    )
+    const stats = computeUsageStats({
+      from: 0,
+      to: Date.now() + 1,
+      lens: "current",
+    })
+    // Now $10 per Mtok input → cost = $10
+    expect(stats.totals.token.cost_usd).toBeCloseTo(10, 5)
+  })
+
+  test("missing_pricing lists models with usage but no pricing row", () => {
+    setupDb()
+    recordUsage({
+      ...baseRecord,
+      modelId: "unknown-model",
+      usage: {
+        inputTokens: 1,
+        cachedInputTokens: 0,
+        outputTokens: 0,
+        reasoningTokens: 0,
+        totalTokens: 1,
+      },
+    })
+    const stats = computeUsageStats({
+      from: 0,
+      to: Date.now() + 1,
+      lens: "historical",
+    })
+    expect(stats.missing_pricing).toContain("unknown-model")
+  })
+
+  test("filter by endpoint narrows the result", () => {
+    setupDb()
+    recordUsage({
+      ...baseRecord,
+      usage: {
+        inputTokens: 100,
+        cachedInputTokens: 0,
+        outputTokens: 0,
+        reasoningTokens: 0,
+        totalTokens: 100,
+      },
+    })
+    recordUsage({
+      ...baseRecord,
+      endpoint: "embeddings",
+      usage: {
+        inputTokens: 50,
+        cachedInputTokens: 0,
+        outputTokens: 0,
+        reasoningTokens: 0,
+        totalTokens: 50,
+      },
+    })
+    const allStats = computeUsageStats({
+      from: 0,
+      to: Date.now() + 1,
+      lens: "historical",
+    })
+    expect(allStats.totals.token.input).toBe(150)
+
+    const embeddingsOnly = computeUsageStats({
+      from: 0,
+      to: Date.now() + 1,
+      lens: "historical",
+      endpoint: "embeddings",
+    })
+    expect(embeddingsOnly.totals.token.input).toBe(50)
+  })
+})

From bd0a68ff36621a6f84dab1c665764fabdd996929 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 14:58:00 +0800
Subject: [PATCH 13/34] feat(pricing): sync sources, LLM caller, validators
 (Task #12)

- src/lib/pricing-sources.ts: fetchAzureRetailPrices() pages through Azure Retail Prices API; fetchAnthropicPricingHtml() best-effort GET.
- src/lib/pricing-sync.ts: pickSyncModel, buildSyncRequest, callSyncLlm (x-internal-pricing-sync + json_object), priceChanged (0.5%), sanityFails (10x).

Tests: priceChanged, sanityFails, pickSyncModel.

Refs #12

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/pricing-sources.ts |  69 +++++++++++++
 src/lib/pricing-sync.ts    | 204 +++++++++++++++++++++++++++++++++++++
 tests/pricing-sync.test.ts | 114 +++++++++++++++++++++
 3 files changed, 387 insertions(+)
 create mode 100644 src/lib/pricing-sources.ts
 create mode 100644 src/lib/pricing-sync.ts
 create mode 100644 tests/pricing-sync.test.ts

diff --git a/src/lib/pricing-sources.ts b/src/lib/pricing-sources.ts
new file mode 100644
index 000000000..2cdb74b28
--- /dev/null
+++ b/src/lib/pricing-sources.ts
@@ -0,0 +1,69 @@
+/**
+ * Fetchers for raw pricing source data: Azure Retail Prices API and the
+ * Anthropic public pricing page. No DB writes, no LLM, no `state`.
+ */
+
+const AZURE_FILTER = encodeURIComponent(
+  "serviceName eq 'Cognitive Services' and serviceFamily eq 'AI + Machine Learning'",
+)
+const AZURE_BASE = `https://prices.azure.com/api/retail/prices?$filter=${AZURE_FILTER}`
+const ANTHROPIC_PRICING_URL = "https://www.anthropic.com/pricing"
+
+export interface AzureRow {
+  meterName?: string
+  productName?: string
+  skuName?: string
+  retailPrice?: number
+  unitOfMeasure?: string
+  currencyCode?: string
+  armSkuName?: string
+  serviceName?: string
+}
+
+interface AzureResponse {
+  Items?: Array<AzureRow>
+  NextPageLink?: string | null
+}
+
+/**
+ * Page through the Azure Retail Prices API until exhausted.
+ * Returns ALL rows for `serviceName = Cognitive Services` and
+ * `serviceFamily = AI + Machine Learning`.
+ */
+export async function fetchAzureRetailPrices(
+  fetchImpl: typeof fetch = fetch,
+): Promise<Array<AzureRow>> {
+  const out: Array<AzureRow> = []
+  let url: string | null | undefined = AZURE_BASE
+  while (url) {
+    const resp = await fetchImpl(url)
+    if (!resp.ok) {
+      throw new Error(
+        `Azure pricing fetch failed: ${resp.status} ${resp.statusText}`,
+      )
+    }
+    const body = (await resp.json()) as AzureResponse
+    if (body.Items) out.push(...body.Items)
+    url = body.NextPageLink ?? null
+  }
+  return out
+}
+
+/**
+ * Best-effort extract of the pricing section from anthropic.com/pricing.
+ * Returns the raw HTML; LLM is responsible for parsing. Returns null on
+ * fetch failure or if the HTML doesn't look like a pricing page.
+ */
+export async function fetchAnthropicPricingHtml(
+  fetchImpl: typeof fetch = fetch,
+): Promise<string | null> {
+  try {
+    const resp = await fetchImpl(ANTHROPIC_PRICING_URL)
+    if (!resp.ok) return null
+    const html = await resp.text()
+    if (!/pricing|per million/i.test(html)) return null
+    return html
+  } catch {
+    return null
+  }
+}
diff --git a/src/lib/pricing-sync.ts b/src/lib/pricing-sync.ts
new file mode 100644
index 000000000..08e4f821b
--- /dev/null
+++ b/src/lib/pricing-sync.ts
@@ -0,0 +1,204 @@
+import consola from "consola"
+
+import type { AzureRow } from "./pricing-sources"
+
+import {
+  fetchAnthropicPricingHtml,
+  fetchAzureRetailPrices,
+} from "./pricing-sources"
+import { state } from "./state"
+
+export const PRICING_FIELDS = [
+  "input_per_mtok",
+  "cached_input_per_mtok",
+  "output_per_mtok",
+  "reasoning_per_mtok",
+  "premium_multiplier",
+  "premium_unit_price",
+] as const
+
+export type PricingField = (typeof PRICING_FIELDS)[number]
+
+export interface PricingRow {
+  model_id: string
+  input_per_mtok: number | null
+  cached_input_per_mtok: number | null
+  output_per_mtok: number | null
+  reasoning_per_mtok: number | null
+  premium_multiplier: number | null
+  premium_unit_price: number | null
+  currency?: string | null
+  source?: string | null
+  source_skus?: Array<string> | null
+}
+
+export interface ParsedPricing {
+  models: Array<PricingRow>
+}
+
+export interface SyncRequest {
+  knownModels: Array<string>
+  azureRows: Array<AzureRow>
+  anthropicHtml: string | null
+}
+
+const PRICE_CHANGE_EPSILON = 0.005
+const PRICE_SANITY_RATIO = 10
+
+export const SYNC_MODEL_WHITELIST = [
+  "gpt-5",
+  "gpt-4.1",
+  "gpt-4o",
+  "claude-sonnet-4",
+  "claude-3-7-sonnet",
+]
+
+export function pickSyncModel(cliFlag: string | undefined): string {
+  const known = state.models?.data.map((m) => m.id) ?? []
+  if (cliFlag && known.includes(cliFlag)) return cliFlag
+  for (const wl of SYNC_MODEL_WHITELIST) {
+    if (known.includes(wl)) {
+      if (cliFlag && cliFlag !== wl) {
+        consola.warn(
+          `Pricing sync model "${cliFlag}" not available; falling back to "${wl}"`,
+        )
+      }
+      return wl
+    }
+  }
+  if (known.length === 0) {
+    throw new Error("Cannot pick sync model: state.models is empty")
+  }
+  consola.warn(
+    `Pricing sync whitelist had no match; falling back to first available model "${known[0]}"`,
+  )
+  return known[0]
+}
+
+export async function buildSyncRequest(): Promise<SyncRequest> {
+  const knownModels = state.models?.data.map((m) => m.id) ?? []
+  const hasClaude = knownModels.some((m) => m.startsWith("claude"))
+  const [azureRows, anthropicHtml] = await Promise.all([
+    fetchAzureRetailPrices(),
+    hasClaude ? fetchAnthropicPricingHtml() : Promise.resolve(null),
+  ])
+  return { knownModels, azureRows, anthropicHtml }
+}
+
+export const NORMALIZER_SYSTEM_PROMPT = `You are a pricing extractor. Convert raw price source rows from Azure Retail Prices API and the Anthropic public pricing page into a normalized JSON shape.
+
+Output schema (strict JSON, no prose):
+{
+  "models": [
+    {
+      "model_id": "string – must match one of the supplied knownModels exactly",
+      "input_per_mtok": number | null,
+      "cached_input_per_mtok": number | null,
+      "output_per_mtok": number | null,
+      "reasoning_per_mtok": number | null,
+      "premium_multiplier": number | null,
+      "premium_unit_price": number | null,
+      "currency": "USD",
+      "source": "azure-retail" | "anthropic-public" | "manual",
+      "source_skus": ["string array of source SKU/product identifiers used"]
+    }
+  ]
+}
+
+Rules:
+- Only include models present in knownModels. Skip everything else.
+- Use USD; if a row is in another currency, convert to USD only if obvious, otherwise omit.
+- "per_mtok" means dollars per 1,000,000 tokens. Convert per-1k or per-token rates accordingly.
+- premium_multiplier and premium_unit_price come from GitHub Copilot premium pricing — do not invent.
+- Leave fields you cannot confidently derive as null. Do not guess.
+- Output a single JSON object. No markdown fences, no commentary.`
+
+export interface CallSyncLlmOptions {
+  port: number
+  fetchImpl?: typeof fetch
+}
+
+export async function callSyncLlm(
+  req: SyncRequest,
+  modelId: string,
+  options: CallSyncLlmOptions,
+): Promise<ParsedPricing> {
+  const fetchImpl = options.fetchImpl ?? fetch
+  const resp = await fetchImpl(
+    `http://localhost:${options.port}/v1/chat/completions`,
+    {
+      method: "POST",
+      headers: {
+        "content-type": "application/json",
+        "x-internal-pricing-sync": "1",
+      },
+      body: JSON.stringify({
+        model: modelId,
+        response_format: { type: "json_object" },
+        messages: [
+          { role: "system", content: NORMALIZER_SYSTEM_PROMPT },
+          { role: "user", content: JSON.stringify(req) },
+        ],
+      }),
+    },
+  )
+  if (!resp.ok) {
+    throw new Error(
+      `Pricing-sync LLM call failed: ${resp.status} ${resp.statusText}`,
+    )
+  }
+  const body = (await resp.json()) as {
+    choices?: Array<{ message?: { content?: string } }>
+  }
+  const content = body.choices?.[0]?.message?.content
+  if (!content) {
+    throw new Error("Pricing-sync LLM response had no content")
+  }
+  const parsed = JSON.parse(content) as ParsedPricing
+  if (!Array.isArray(parsed.models)) {
+    throw new TypeError("Pricing-sync LLM response missing `models` array")
+  }
+  return parsed
+}
+
+function diffsExceeds(
+  a: number | null | undefined,
+  b: number | null | undefined,
+  epsilon: number,
+): boolean {
+  if (a === null && b === null) return false
+  if (a === null || b === null) return true
+  if (a === 0 && b === 0) return false
+  if (a === 0 || b === 0) return true
+  return Math.abs(b - a) / Math.abs(a) >= epsilon
+}
+
+export function priceChanged(
+  oldRow: Partial<Record<PricingField, number | null>> | null | undefined,
+  newRow: Partial<Record<PricingField, number | null>>,
+): boolean {
+  if (!oldRow) return true
+  for (const f of PRICING_FIELDS) {
+    if (
+      diffsExceeds(oldRow[f] ?? null, newRow[f] ?? null, PRICE_CHANGE_EPSILON)
+    ) {
+      return true
+    }
+  }
+  return false
+}
+
+export function sanityFails(
+  oldRow: Partial<Record<PricingField, number | null>> | null | undefined,
+  newRow: Partial<Record<PricingField, number | null>>,
+): boolean {
+  if (!oldRow) return false
+  for (const f of PRICING_FIELDS) {
+    const a = oldRow[f] ?? null
+    const b = newRow[f] ?? null
+    if (a === null || b === null || a === 0 || b === 0) continue
+    const r = b / a
+    if (r > PRICE_SANITY_RATIO || r < 1 / PRICE_SANITY_RATIO) return true
+  }
+  return false
+}
diff --git a/tests/pricing-sync.test.ts b/tests/pricing-sync.test.ts
new file mode 100644
index 000000000..70345aa6d
--- /dev/null
+++ b/tests/pricing-sync.test.ts
@@ -0,0 +1,114 @@
+import { test, expect, describe, beforeEach } from "bun:test"
+
+import type { ModelsResponse } from "../src/services/copilot/get-models"
+
+import {
+  pickSyncModel,
+  priceChanged,
+  sanityFails,
+  type PricingField,
+} from "../src/lib/pricing-sync"
+import { state } from "../src/lib/state"
+
+const ZERO = (): Record<PricingField, number | null> => ({
+  input_per_mtok: null,
+  cached_input_per_mtok: null,
+  output_per_mtok: null,
+  reasoning_per_mtok: null,
+  premium_multiplier: null,
+  premium_unit_price: null,
+})
+
+describe("priceChanged", () => {
+  test("returns true when old row missing", () => {
+    expect(priceChanged(null, ZERO())).toBe(true)
+  })
+
+  test("returns false when both rows are equal", () => {
+    const a = { ...ZERO(), input_per_mtok: 5 }
+    expect(priceChanged(a, { ...ZERO(), input_per_mtok: 5 })).toBe(false)
+  })
+
+  test("returns false when change is below 0.5%", () => {
+    const a = { ...ZERO(), input_per_mtok: 100 }
+    const b = { ...ZERO(), input_per_mtok: 100.4 }
+    expect(priceChanged(a, b)).toBe(false)
+  })
+
+  test("returns true when change is at or above 0.5%", () => {
+    const a = { ...ZERO(), input_per_mtok: 100 }
+    const b = { ...ZERO(), input_per_mtok: 100.5 }
+    expect(priceChanged(a, b)).toBe(true)
+  })
+
+  test("returns true when one side is null and the other not", () => {
+    const a = { ...ZERO(), input_per_mtok: 5 }
+    const b = ZERO()
+    expect(priceChanged(a, b)).toBe(true)
+  })
+
+  test("returns false when both sides are null for all fields", () => {
+    expect(priceChanged(ZERO(), ZERO())).toBe(false)
+  })
+
+  test("returns true when one zero and one non-zero", () => {
+    const a = { ...ZERO(), input_per_mtok: 0 }
+    const b = { ...ZERO(), input_per_mtok: 5 }
+    expect(priceChanged(a, b)).toBe(true)
+  })
+})
+
+describe("sanityFails", () => {
+  test("returns false on first entry (no oldRow)", () => {
+    expect(sanityFails(null, { ...ZERO(), input_per_mtok: 100 })).toBe(false)
+  })
+
+  test("passes on within-bounds change (10x boundary)", () => {
+    const a = { ...ZERO(), input_per_mtok: 1 }
+    const b = { ...ZERO(), input_per_mtok: 9.99 }
+    expect(sanityFails(a, b)).toBe(false)
+  })
+
+  test("fails when change exceeds 10x", () => {
+    const a = { ...ZERO(), input_per_mtok: 1 }
+    const b = { ...ZERO(), input_per_mtok: 100 }
+    expect(sanityFails(a, b)).toBe(true)
+  })
+
+  test("fails when change drops below 1/10x", () => {
+    const a = { ...ZERO(), input_per_mtok: 100 }
+    const b = { ...ZERO(), input_per_mtok: 5 }
+    expect(sanityFails(a, b)).toBe(true)
+  })
+
+  test("ignores fields where either side is null or zero", () => {
+    const a = { ...ZERO(), input_per_mtok: 1 }
+    const b = { ...ZERO(), output_per_mtok: 100 }
+    expect(sanityFails(a, b)).toBe(false)
+  })
+})
+
+describe("pickSyncModel", () => {
+  beforeEach(() => {
+    const data = [
+      { id: "gpt-4o" },
+      { id: "claude-sonnet-4" },
+      { id: "gpt-3.5-turbo" },
+    ] as unknown as ModelsResponse["data"]
+    state.models = { object: "list", data }
+  })
+
+  test("returns CLI flag when present in models", () => {
+    expect(pickSyncModel("gpt-4o")).toBe("gpt-4o")
+  })
+
+  test("falls back to whitelist when CLI flag is unknown", () => {
+    expect(pickSyncModel("does-not-exist")).toBe("gpt-4o")
+  })
+
+  test("falls back to first model when no whitelist match", () => {
+    const data = [{ id: "exotic-model" }] as unknown as ModelsResponse["data"]
+    state.models = { object: "list", data }
+    expect(pickSyncModel(undefined)).toBe("exotic-model")
+  })
+})

From f16e5ac9c8cc1b30dc09ae8f56d10b1e115742f5 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 15:00:48 +0800
Subject: [PATCH 14/34] feat(pricing): version-write logic for pricing sync
 (Task #13)

- src/lib/pricing-sync-runner.ts: runPricingSync({port, syncModel?, parsedOverride?}) glues task #12's parsed output into the DB:
  - Sanity gate rejects whole sync on any 10x change.
  - For each row, looks up current model_pricing_versions WHERE effective_to IS NULL, skips when priceChanged() is false.
  - On change: patches old version's effective_to and inserts a new row; UPSERTs model_pricing.
  - Writes pricing_sync_log + bumps meta.last_pricing_sync_ts inside the same transaction.
  - Status: ok / partial / rejected / failed.
- src/lib/pricing-sync.ts: tighten null/undefined handling in priceChanged/sanityFails so the runner can pass DB-shaped rows.

Tests: first sync inserts versions; identical second sync no-op; changed price patches + inserts; 10x change rejects + leaves DB unchanged.

Refs #13

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 src/lib/pricing-sync-runner.ts    | 263 ++++++++++++++++++++++++++++++
 src/lib/pricing-sync.ts           |   6 +-
 tests/pricing-sync-runner.test.ts | 118 ++++++++++++++
 3 files changed, 385 insertions(+), 2 deletions(-)
 create mode 100644 src/lib/pricing-sync-runner.ts
 create mode 100644 tests/pricing-sync-runner.test.ts

diff --git a/src/lib/pricing-sync-runner.ts b/src/lib/pricing-sync-runner.ts
new file mode 100644
index 000000000..a5a333f01
--- /dev/null
+++ b/src/lib/pricing-sync-runner.ts
@@ -0,0 +1,263 @@
+import consola from "consola"
+
+import { getDb } from "./db"
+import {
+  buildSyncRequest,
+  callSyncLlm,
+  pickSyncModel,
+  priceChanged,
+  PRICING_FIELDS,
+  sanityFails,
+  type ParsedPricing,
+  type PricingField,
+  type PricingRow,
+} from "./pricing-sync"
+
+export type SyncStatus = "ok" | "partial" | "rejected" | "failed"
+
+export interface RunPricingSyncOptions {
+  port: number
+  syncModel?: string
+  /** Test seam: bypass HTTP fetchers and inject the parsed result directly. */
+  parsedOverride?: ParsedPricing
+}
+
+export interface RunPricingSyncResult {
+  status: SyncStatus
+  updated: number
+  rejected: number
+  logId?: number
+  error?: string
+}
+
+interface CurrentRow {
+  id: number
+  input_per_mtok: number | null
+  cached_input_per_mtok: number | null
+  output_per_mtok: number | null
+  reasoning_per_mtok: number | null
+  premium_multiplier: number | null
+  premium_unit_price: number | null
+}
+
+function selectCurrentVersion(modelId: string): CurrentRow | undefined {
+  return (
+    getDb()
+      .query<CurrentRow, [string]>(
+        `SELECT id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
+              reasoning_per_mtok, premium_multiplier, premium_unit_price
+         FROM model_pricing_versions
+        WHERE model_id = ? AND effective_to IS NULL`,
+      )
+      .get(modelId) ?? undefined
+  )
+}
+
+function rowToFieldMap(row: PricingRow): Record<PricingField, number | null> {
+  const out: Record<PricingField, number | null> = {
+    input_per_mtok: null,
+    cached_input_per_mtok: null,
+    output_per_mtok: null,
+    reasoning_per_mtok: null,
+    premium_multiplier: null,
+    premium_unit_price: null,
+  }
+  for (const f of PRICING_FIELDS) {
+    out[f] = row[f] ?? null
+  }
+  return out
+}
+
+interface ApplyArgs {
+  row: PricingRow
+  detectedAt: number
+  syncLogId: number
+}
+
+function applyPricingChange(args: ApplyArgs): "changed" | "unchanged" {
+  const db = getDb()
+  const newRow = rowToFieldMap(args.row)
+  const current = selectCurrentVersion(args.row.model_id)
+  if (current && !priceChanged(current, newRow)) {
+    return "unchanged"
+  }
+  if (current) {
+    db.run("UPDATE model_pricing_versions SET effective_to = ? WHERE id = ?", [
+      args.detectedAt,
+      current.id,
+    ])
+  }
+  db.run(
+    `INSERT INTO model_pricing_versions (
+       model_id, effective_from, effective_to,
+       input_per_mtok, cached_input_per_mtok, output_per_mtok,
+       reasoning_per_mtok, premium_multiplier, premium_unit_price,
+       currency, source, source_skus, sync_log_id, created_at
+     ) VALUES (?, ?, NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    [
+      args.row.model_id,
+      args.detectedAt,
+      newRow.input_per_mtok,
+      newRow.cached_input_per_mtok,
+      newRow.output_per_mtok,
+      newRow.reasoning_per_mtok,
+      newRow.premium_multiplier,
+      newRow.premium_unit_price,
+      args.row.currency ?? "USD",
+      args.row.source ?? null,
+      args.row.source_skus ? JSON.stringify(args.row.source_skus) : null,
+      args.syncLogId,
+      args.detectedAt,
+    ],
+  )
+  db.run(
+    `INSERT INTO model_pricing (
+       model_id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
+       reasoning_per_mtok, premium_multiplier, premium_unit_price,
+       currency, source, source_skus, updated_at
+     ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+     ON CONFLICT(model_id) DO UPDATE SET
+       input_per_mtok = excluded.input_per_mtok,
+       cached_input_per_mtok = excluded.cached_input_per_mtok,
+       output_per_mtok = excluded.output_per_mtok,
+       reasoning_per_mtok = excluded.reasoning_per_mtok,
+       premium_multiplier = excluded.premium_multiplier,
+       premium_unit_price = excluded.premium_unit_price,
+       currency = excluded.currency,
+       source = excluded.source,
+       source_skus = excluded.source_skus,
+       updated_at = excluded.updated_at`,
+    [
+      args.row.model_id,
+      newRow.input_per_mtok,
+      newRow.cached_input_per_mtok,
+      newRow.output_per_mtok,
+      newRow.reasoning_per_mtok,
+      newRow.premium_multiplier,
+      newRow.premium_unit_price,
+      args.row.currency ?? "USD",
+      args.row.source ?? null,
+      args.row.source_skus ? JSON.stringify(args.row.source_skus) : null,
+      args.detectedAt,
+    ],
+  )
+  return "changed"
+}
+
+export async function runPricingSync(
+  options: RunPricingSyncOptions,
+): Promise<RunPricingSyncResult> {
+  const detectedAt = Date.now()
+  let parsed: ParsedPricing
+  let llmModel = "n/a"
+  try {
+    if (options.parsedOverride) {
+      parsed = options.parsedOverride
+    } else {
+      const req = await buildSyncRequest()
+      llmModel = pickSyncModel(options.syncModel)
+      parsed = await callSyncLlm(req, llmModel, { port: options.port })
+    }
+  } catch (err) {
+    consola.error("Pricing sync fetch/LLM failed:", err)
+    const logId = recordSyncLog({
+      ts: detectedAt,
+      status: "failed",
+      llmModel,
+      modelsUpdated: 0,
+      modelsRejected: 0,
+      error: (err as Error).message,
+    })
+    return {
+      status: "failed",
+      updated: 0,
+      rejected: 0,
+      logId,
+      error: (err as Error).message,
+    }
+  }
+
+  // Sanity gate: any row failing sanity rejects the entire sync.
+  for (const row of parsed.models) {
+    const current = selectCurrentVersion(row.model_id)
+    if (sanityFails(current ?? null, rowToFieldMap(row))) {
+      consola.warn(
+        `Pricing sync rejected: sanity check failed for model ${row.model_id}`,
+      )
+      const logId = recordSyncLog({
+        ts: detectedAt,
+        status: "rejected",
+        llmModel,
+        modelsUpdated: 0,
+        modelsRejected: parsed.models.length,
+      })
+      return {
+        status: "rejected",
+        updated: 0,
+        rejected: parsed.models.length,
+        logId,
+      }
+    }
+  }
+
+  let updated = 0
+  const logId = recordSyncLog({
+    ts: detectedAt,
+    status: "ok",
+    llmModel,
+    modelsUpdated: 0,
+    modelsRejected: 0,
+  })
+
+  const tx = getDb().transaction(() => {
+    for (const row of parsed.models) {
+      const result = applyPricingChange({
+        row,
+        detectedAt,
+        syncLogId: logId,
+      })
+      if (result === "changed") updated += 1
+    }
+    getDb().run(
+      `INSERT INTO meta (key, value) VALUES ('last_pricing_sync_ts', ?)
+       ON CONFLICT(key) DO UPDATE SET value = excluded.value`,
+      [String(detectedAt)],
+    )
+    getDb().run(
+      "UPDATE pricing_sync_log SET models_updated = ?, source_count = ? WHERE id = ?",
+      [updated, parsed.models.length, logId],
+    )
+  })
+  tx()
+
+  return { status: "ok", updated, rejected: 0, logId }
+}
+
+interface RecordSyncLogArgs {
+  ts: number
+  status: SyncStatus
+  llmModel: string
+  modelsUpdated: number
+  modelsRejected: number
+  error?: string
+}
+
+function recordSyncLog(args: RecordSyncLogArgs): number {
+  const stmt = getDb().prepare(
+    `INSERT INTO pricing_sync_log
+       (ts, status, llm_model, models_updated, models_rejected, error)
+     VALUES (?, ?, ?, ?, ?, ?)`,
+  )
+  stmt.run(
+    args.ts,
+    args.status,
+    args.llmModel,
+    args.modelsUpdated,
+    args.modelsRejected,
+    args.error ?? null,
+  )
+  const idRow = getDb()
+    .query<{ id: number }, []>("SELECT last_insert_rowid() AS id")
+    .get()
+  return idRow?.id ?? 0
+}
diff --git a/src/lib/pricing-sync.ts b/src/lib/pricing-sync.ts
index 08e4f821b..17175ab91 100644
--- a/src/lib/pricing-sync.ts
+++ b/src/lib/pricing-sync.ts
@@ -162,10 +162,12 @@ export async function callSyncLlm(
 }
 
 function diffsExceeds(
-  a: number | null | undefined,
-  b: number | null | undefined,
+  rawA: number | null | undefined,
+  rawB: number | null | undefined,
   epsilon: number,
 ): boolean {
+  const a = rawA ?? null
+  const b = rawB ?? null
   if (a === null && b === null) return false
   if (a === null || b === null) return true
   if (a === 0 && b === 0) return false
diff --git a/tests/pricing-sync-runner.test.ts b/tests/pricing-sync-runner.test.ts
new file mode 100644
index 000000000..1061bed25
--- /dev/null
+++ b/tests/pricing-sync-runner.test.ts
@@ -0,0 +1,118 @@
+import { test, expect, describe, beforeEach } from "bun:test"
+
+import { __resetDbForTests, getDb, initDb } from "../src/lib/db"
+import { runPricingSync } from "../src/lib/pricing-sync-runner"
+
+const PORT = 4141
+
+function setupDb() {
+  __resetDbForTests()
+  initDb(":memory:")
+}
+
+const baseModel = (id: string, input: number, output: number) => ({
+  model_id: id,
+  input_per_mtok: input,
+  cached_input_per_mtok: 0,
+  output_per_mtok: output,
+  reasoning_per_mtok: null,
+  premium_multiplier: 1,
+  premium_unit_price: 0.04,
+  currency: "USD",
+  source: "azure-retail",
+  source_skus: ["sku1"],
+})
+
+describe("runPricingSync", () => {
+  beforeEach(() => {
+    setupDb()
+  })
+
+  test("first sync inserts version + materialized rows", async () => {
+    const out = await runPricingSync({
+      port: PORT,
+      parsedOverride: { models: [baseModel("gpt-4o", 5, 15)] },
+    })
+    expect(out.status).toBe("ok")
+    expect(out.updated).toBe(1)
+
+    const versions = getDb()
+      .query<
+        { count: number },
+        []
+      >("SELECT COUNT(*) AS count FROM model_pricing_versions")
+      .get()
+    expect(versions?.count).toBe(1)
+
+    const live = getDb()
+      .query<
+        { input_per_mtok: number },
+        []
+      >("SELECT input_per_mtok FROM model_pricing WHERE model_id = 'gpt-4o'")
+      .get()
+    expect(live?.input_per_mtok).toBe(5)
+  })
+
+  test("identical second sync writes zero new versions", async () => {
+    await runPricingSync({
+      port: PORT,
+      parsedOverride: { models: [baseModel("gpt-4o", 5, 15)] },
+    })
+    const r2 = await runPricingSync({
+      port: PORT,
+      parsedOverride: { models: [baseModel("gpt-4o", 5, 15)] },
+    })
+    expect(r2.updated).toBe(0)
+    const versions = getDb()
+      .query<
+        { count: number },
+        []
+      >("SELECT COUNT(*) AS count FROM model_pricing_versions")
+      .get()
+    expect(versions?.count).toBe(1)
+  })
+
+  test("changed price patches old effective_to and inserts new version", async () => {
+    await runPricingSync({
+      port: PORT,
+      parsedOverride: { models: [baseModel("gpt-4o", 5, 15)] },
+    })
+    const r2 = await runPricingSync({
+      port: PORT,
+      parsedOverride: { models: [baseModel("gpt-4o", 5.5, 15)] }, // 10% change
+    })
+    expect(r2.updated).toBe(1)
+    const rows = getDb()
+      .query<
+        { effective_to: number | null; input_per_mtok: number },
+        []
+      >("SELECT effective_to, input_per_mtok FROM model_pricing_versions ORDER BY id")
+      .all()
+    expect(rows).toHaveLength(2)
+    expect(rows[0].effective_to).not.toBeNull()
+    expect(rows[1].effective_to).toBeNull()
+    expect(rows[1].input_per_mtok).toBeCloseTo(5.5)
+  })
+
+  test("10x change rejects the whole sync", async () => {
+    await runPricingSync({
+      port: PORT,
+      parsedOverride: { models: [baseModel("gpt-4o", 1, 15)] },
+    })
+    const r2 = await runPricingSync({
+      port: PORT,
+      parsedOverride: {
+        models: [baseModel("gpt-4o", 100, 15)], // 100x → fails
+      },
+    })
+    expect(r2.status).toBe("rejected")
+    expect(r2.updated).toBe(0)
+    const live = getDb()
+      .query<
+        { input_per_mtok: number },
+        []
+      >("SELECT input_per_mtok FROM model_pricing WHERE model_id = 'gpt-4o'")
+      .get()
+    expect(live?.input_per_mtok).toBe(1) // unchanged
+  })
+})

From c1aa449983348040088470ce8f03e067a4f7e04c Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 16:36:32 +0800
Subject: [PATCH 15/34] feat(pricing): background pricing sync scheduler (Task
 #14)

Extract promptClaudeCodeSetup helper to stay within max-lines-per-function.
Wire schedulePricingSync into start.ts with CLI flags:
--pricing-sync-model, --pricing-sync-interval-days, --pricing-sync-disabled

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lib/pricing-scheduler.ts |  61 ++++++++++++++++++
 src/start.ts                 | 119 +++++++++++++++++++++++------------
 2 files changed, 139 insertions(+), 41 deletions(-)
 create mode 100644 src/lib/pricing-scheduler.ts

diff --git a/src/lib/pricing-scheduler.ts b/src/lib/pricing-scheduler.ts
new file mode 100644
index 000000000..eed91d4ca
--- /dev/null
+++ b/src/lib/pricing-scheduler.ts
@@ -0,0 +1,61 @@
+import consola from "consola"
+
+import { getDb } from "./db"
+import { runPricingSync } from "./pricing-sync-runner"
+
+export interface SchedulePricingSyncOptions {
+  port: number
+  intervalDays: number
+  syncModel?: string
+}
+
+function readLastSync(): number {
+  try {
+    const row = getDb()
+      .query<
+        { value: string },
+        []
+      >("SELECT value FROM meta WHERE key = 'last_pricing_sync_ts'")
+      .get()
+    if (!row) return 0
+    return Number.parseInt(row.value, 10) || 0
+  } catch {
+    return 0
+  }
+}
+
+/**
+ * Background scheduler: runs pricing sync at startup if the last run is older
+ * than the interval (or has never happened), and then every `intervalDays`.
+ *
+ * Errors never crash the process.
+ */
+export function schedulePricingSync(options: SchedulePricingSyncOptions): void {
+  const intervalMs = options.intervalDays * 86_400_000
+
+  const tick = () => {
+    const last = readLastSync()
+    const delay = Math.max(0, last + intervalMs - Date.now())
+    setTimeout(() => {
+      runPricingSync({
+        port: options.port,
+        syncModel: options.syncModel,
+      })
+        .then(
+          (result) => {
+            consola.info(
+              `Pricing sync ${result.status} (updated=${result.updated}, rejected=${result.rejected})`,
+            )
+          },
+          (err: unknown) => {
+            consola.warn("Pricing sync failed:", err)
+          },
+        )
+        .finally(() => {
+          tick()
+        })
+    }, delay)
+  }
+
+  tick()
+}
diff --git a/src/start.ts b/src/start.ts
index 423f51337..701946373 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -10,6 +10,7 @@ import { AccountPool, type Strategy } from "./lib/account-pool"
 import { loadAccounts, persistAccounts } from "./lib/accounts-loader"
 import { initDb } from "./lib/db"
 import { ensurePaths, PATHS } from "./lib/paths"
+import { schedulePricingSync } from "./lib/pricing-scheduler"
 import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
@@ -31,6 +32,53 @@ interface RunServerOptions {
   dbPath: string
   accountsFile?: string
   strategy: Strategy
+  pricingSyncModel?: string
+  pricingSyncIntervalDays: number
+  pricingSyncDisabled: boolean
+}
+
+async function promptClaudeCodeSetup(serverUrl: string): Promise<void> {
+  invariant(state.models, "Models should be loaded by now")
+
+  const selectedModel = await consola.prompt(
+    "Select a model to use with Claude Code",
+    {
+      type: "select",
+      options: state.models.data.map((model) => model.id),
+    },
+  )
+
+  const selectedSmallModel = await consola.prompt(
+    "Select a small model to use with Claude Code",
+    {
+      type: "select",
+      options: state.models.data.map((model) => model.id),
+    },
+  )
+
+  const command = generateEnvScript(
+    {
+      ANTHROPIC_BASE_URL: serverUrl,
+      ANTHROPIC_AUTH_TOKEN: "dummy",
+      ANTHROPIC_MODEL: selectedModel,
+      ANTHROPIC_DEFAULT_SONNET_MODEL: selectedModel,
+      ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
+      ANTHROPIC_DEFAULT_HAIKU_MODEL: selectedSmallModel,
+      DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
+      CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
+    },
+    "claude",
+  )
+
+  try {
+    clipboard.writeSync(command)
+    consola.success("Copied Claude Code command to clipboard!")
+  } catch {
+    consola.warn(
+      "Failed to copy to clipboard. Here is the Claude Code command:",
+    )
+    consola.log(command)
+  }
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
@@ -102,47 +150,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   const serverUrl = `http://localhost:${options.port}`
 
   if (options.claudeCode) {
-    invariant(state.models, "Models should be loaded by now")
-
-    const selectedModel = await consola.prompt(
-      "Select a model to use with Claude Code",
-      {
-        type: "select",
-        options: state.models.data.map((model) => model.id),
-      },
-    )
-
-    const selectedSmallModel = await consola.prompt(
-      "Select a small model to use with Claude Code",
-      {
-        type: "select",
-        options: state.models.data.map((model) => model.id),
-      },
-    )
-
-    const command = generateEnvScript(
-      {
-        ANTHROPIC_BASE_URL: serverUrl,
-        ANTHROPIC_AUTH_TOKEN: "dummy",
-        ANTHROPIC_MODEL: selectedModel,
-        ANTHROPIC_DEFAULT_SONNET_MODEL: selectedModel,
-        ANTHROPIC_SMALL_FAST_MODEL: selectedSmallModel,
-        ANTHROPIC_DEFAULT_HAIKU_MODEL: selectedSmallModel,
-        DISABLE_NON_ESSENTIAL_MODEL_CALLS: "1",
-        CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: "1",
-      },
-      "claude",
-    )
-
-    try {
-      clipboard.writeSync(command)
-      consola.success("Copied Claude Code command to clipboard!")
-    } catch {
-      consola.warn(
-        "Failed to copy to clipboard. Here is the Claude Code command:",
-      )
-      consola.log(command)
-    }
+    await promptClaudeCodeSetup(serverUrl)
   }
 
   consola.box(
@@ -153,6 +161,14 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     fetch: server.fetch as ServerHandler,
     port: options.port,
   })
+
+  if (!options.pricingSyncDisabled) {
+    schedulePricingSync({
+      port: options.port,
+      intervalDays: options.pricingSyncIntervalDays,
+      syncModel: options.pricingSyncModel,
+    })
+  }
 }
 
 export const start = defineCommand({
@@ -236,6 +252,21 @@ export const start = defineCommand({
       description:
         "Account selection strategy: round-robin | least-busy | least-recent",
     },
+    "pricing-sync-model": {
+      type: "string",
+      description:
+        "Model to use for LLM-powered pricing sync (default: auto-select from whitelist)",
+    },
+    "pricing-sync-interval-days": {
+      type: "string",
+      default: "7",
+      description: "How often (in days) to re-sync model pricing",
+    },
+    "pricing-sync-disabled": {
+      type: "boolean",
+      default: false,
+      description: "Disable automatic background pricing sync",
+    },
   },
   run({ args }) {
     const rateLimitRaw = args["rate-limit"]
@@ -257,6 +288,12 @@ export const start = defineCommand({
       dbPath: args["db-path"],
       accountsFile: args["accounts-file"],
       strategy: args.strategy as Strategy,
+      pricingSyncModel: args["pricing-sync-model"],
+      pricingSyncIntervalDays: Number.parseInt(
+        args["pricing-sync-interval-days"],
+        10,
+      ),
+      pricingSyncDisabled: args["pricing-sync-disabled"],
     })
   },
 })

From 74d02a971f7d5e29be0baec0eaf11b502b600908 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 16:40:26 +0800
Subject: [PATCH 16/34] feat(pricing): add pricing-sync subcommand for manual
 sync (Task #15)

New `pricing-sync` CLI subcommand bootstraps a temporary server and runs
a one-off pricing sync against Azure/Anthropic sources via the LLM
extraction pipeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/main.ts             |   9 ++-
 src/pricing-sync-cmd.ts | 153 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 src/pricing-sync-cmd.ts

diff --git a/src/main.ts b/src/main.ts
index 4f6ca784b..c21a76b41 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -5,6 +5,7 @@ import { defineCommand, runMain } from "citty"
 import { auth } from "./auth"
 import { checkUsage } from "./check-usage"
 import { debug } from "./debug"
+import { pricingSyncCmd } from "./pricing-sync-cmd"
 import { start } from "./start"
 
 const main = defineCommand({
@@ -13,7 +14,13 @@ const main = defineCommand({
     description:
       "A wrapper around GitHub Copilot API to make it OpenAI compatible, making it usable for other tools.",
   },
-  subCommands: { auth, start, "check-usage": checkUsage, debug },
+  subCommands: {
+    auth,
+    start,
+    "check-usage": checkUsage,
+    "pricing-sync": pricingSyncCmd,
+    debug,
+  },
 })
 
 await runMain(main)
diff --git a/src/pricing-sync-cmd.ts b/src/pricing-sync-cmd.ts
new file mode 100644
index 000000000..55faa6cc4
--- /dev/null
+++ b/src/pricing-sync-cmd.ts
@@ -0,0 +1,153 @@
+import { defineCommand } from "citty"
+import consola from "consola"
+import { serve, type ServerHandler } from "srvx"
+
+import { loadAccounts } from "./lib/accounts-loader"
+import { initDb } from "./lib/db"
+import { ensurePaths, PATHS } from "./lib/paths"
+import { runPricingSync } from "./lib/pricing-sync-runner"
+import { initProxyFromEnv } from "./lib/proxy"
+import { state } from "./lib/state"
+import { setupCopilotTokenFor, setupGitHubToken } from "./lib/token"
+import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
+import { server } from "./server"
+
+interface RunPricingSyncCmdOptions {
+  port: number
+  syncModel?: string
+  githubToken?: string
+  accountsFile?: string
+  accountType: string
+  dbPath: string
+  proxyEnv: boolean
+  verbose: boolean
+}
+
+async function bootstrapServer(
+  options: RunPricingSyncCmdOptions,
+): Promise<void> {
+  if (options.proxyEnv) {
+    initProxyFromEnv()
+  }
+  if (options.verbose) {
+    consola.level = 5
+  }
+
+  state.accountType = options.accountType
+  await ensurePaths()
+  initDb(options.dbPath)
+  await cacheVSCodeVersion()
+
+  let legacyToken = options.githubToken
+  if (!options.accountsFile && !legacyToken) {
+    legacyToken = await setupGitHubToken()
+  }
+
+  const loaded = await loadAccounts({
+    accountsFile: options.accountsFile,
+    legacyToken,
+    defaultAccountType: options.accountType,
+  })
+
+  if (loaded.length === 0) {
+    throw new Error("No accounts available.")
+  }
+
+  state.pool = undefined as never // not needed for sync
+  await Promise.all(loaded.map((a) => setupCopilotTokenFor(a)))
+  await cacheModels()
+}
+
+function startTempServer(port: number): void {
+  serve({
+    fetch: server.fetch as ServerHandler,
+    port,
+  })
+}
+
+export async function runPricingSyncCmd(
+  options: RunPricingSyncCmdOptions,
+): Promise<void> {
+  await bootstrapServer(options)
+  startTempServer(options.port)
+
+  consola.info("Running one-off pricing sync…")
+  const result = await runPricingSync({
+    port: options.port,
+    syncModel: options.syncModel,
+  })
+
+  if (result.status === "ok") {
+    consola.success(`Pricing sync complete: ${result.updated} model(s) updated`)
+  } else if (result.status === "rejected") {
+    consola.warn(
+      `Pricing sync rejected (sanity check): ${result.rejected} model(s)`,
+    )
+  } else {
+    consola.error(`Pricing sync failed: ${result.error ?? "unknown error"}`)
+  }
+
+  process.exit(result.status === "ok" ? 0 : 1)
+}
+
+export const pricingSyncCmd = defineCommand({
+  meta: {
+    name: "pricing-sync",
+    description: "Run a one-off pricing sync against Azure and Anthropic",
+  },
+  args: {
+    port: {
+      alias: "p",
+      type: "string",
+      default: "4141",
+      description: "Port for the temporary server (needed for LLM self-call)",
+    },
+    "sync-model": {
+      type: "string",
+      description: "Model to use for the LLM extraction step",
+    },
+    "github-token": {
+      alias: "g",
+      type: "string",
+      description: "GitHub token",
+    },
+    "accounts-file": {
+      type: "string",
+      description: "Path to accounts JSON file",
+    },
+    "account-type": {
+      alias: "a",
+      type: "string",
+      default: "individual",
+      description: "Account type",
+    },
+    "db-path": {
+      type: "string",
+      default: PATHS.USAGE_DB_PATH,
+      description: "Path to the usage SQLite database",
+    },
+    "proxy-env": {
+      type: "boolean",
+      default: false,
+      description: "Initialize proxy from environment variables",
+    },
+    verbose: {
+      alias: "v",
+      type: "boolean",
+      default: false,
+      description: "Enable verbose logging",
+    },
+  },
+  run({ args }) {
+    return runPricingSyncCmd({
+      port: Number.parseInt(args.port, 10),
+      syncModel: args["sync-model"],
+      githubToken: args["github-token"],
+      accountsFile: args["accounts-file"],
+      accountType: args["account-type"],
+      dbPath: args["db-path"],
+      proxyEnv: args["proxy-env"],
+      verbose: args.verbose,
+    })
+  },
+})

From 1dffe4c595bf097d9352307c88b5a0da6d409c86 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 16:43:10 +0800
Subject: [PATCH 17/34] feat(usage): expose pricing metadata and sync status in
 /usage (Task #16)

Add pricing.models (current per-model rates) and pricing.lastSync
(latest sync log entry) to the /usage endpoint response, enabling
dashboard consumers to display cost configuration and sync health.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/routes/usage/route.ts | 52 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/routes/usage/route.ts b/src/routes/usage/route.ts
index b82d575a4..276268cd4 100644
--- a/src/routes/usage/route.ts
+++ b/src/routes/usage/route.ts
@@ -1,5 +1,6 @@
 import { Hono } from "hono"
 
+import { getDb } from "~/lib/db"
 import { state } from "~/lib/state"
 import {
   computeUsageStats,
@@ -60,6 +61,56 @@ async function fetchQuota(): Promise<QuotaPayload> {
   return { byAccount: results, primary }
 }
 
+interface PricingEntry {
+  model_id: string
+  input_per_mtok: number | null
+  cached_input_per_mtok: number | null
+  output_per_mtok: number | null
+  reasoning_per_mtok: number | null
+  premium_multiplier: number | null
+  premium_unit_price: number | null
+  source: string | null
+  updated_at: number | null
+}
+
+interface SyncLogEntry {
+  id: number
+  ts: number
+  status: string
+  llm_model: string
+  models_updated: number
+  models_rejected: number
+  error: string | null
+}
+
+function fetchPricingMeta(): {
+  models: Array<PricingEntry>
+  lastSync: SyncLogEntry | null
+} {
+  try {
+    const db = getDb()
+    const models = db
+      .query<PricingEntry, []>(
+        `SELECT model_id, input_per_mtok, cached_input_per_mtok,
+                output_per_mtok, reasoning_per_mtok,
+                premium_multiplier, premium_unit_price,
+                source, updated_at
+         FROM model_pricing ORDER BY model_id`,
+      )
+      .all()
+    const lastSync =
+      db
+        .query<SyncLogEntry, []>(
+          `SELECT id, ts, status, llm_model, models_updated, models_rejected, error
+         FROM pricing_sync_log ORDER BY id DESC LIMIT 1`,
+        )
+        .get() ?? null
+    return { models, lastSync }
+  } catch {
+    return { models: [], lastSync: null }
+  }
+}
+
 usageRoute.get("/", async (c) => {
   const stats = (() => {
     try {
@@ -86,5 +137,6 @@ usageRoute.get("/", async (c) => {
     ...(primary as Record<string, unknown> | null | undefined),
     quota,
     stats,
+    pricing: fetchPricingMeta(),
   })
 })

From 0527f9625ab87f1527f7e91eb873fe90ccb5a628 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 17:01:52 +0800
Subject: [PATCH 18/34] fix(pricing): fix 400 error in pricing sync LLM call

- Remove response_format (not supported by all Copilot models)
- Pre-filter Azure rows to only model-relevant entries (reduces payload)
- Trim Anthropic HTML to pricing section (~10KB cap)
- Strip markdown fences from LLM response
- Log response body on error for easier debugging

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lib/pricing-sync.ts | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/lib/pricing-sync.ts b/src/lib/pricing-sync.ts
index 17175ab91..b7a526a0d 100644
--- a/src/lib/pricing-sync.ts
+++ b/src/lib/pricing-sync.ts
@@ -78,11 +78,33 @@ export function pickSyncModel(cliFlag: string | undefined): string {
 export async function buildSyncRequest(): Promise<SyncRequest> {
   const knownModels = state.models?.data.map((m) => m.id) ?? []
   const hasClaude = knownModels.some((m) => m.startsWith("claude"))
-  const [azureRows, anthropicHtml] = await Promise.all([
+  const [azureRowsRaw, anthropicHtml] = await Promise.all([
     fetchAzureRetailPrices(),
     hasClaude ? fetchAnthropicPricingHtml() : Promise.resolve(null),
   ])
-  return { knownModels, azureRows, anthropicHtml }
+  // Pre-filter Azure rows: keep only rows whose productName or meterName
+  // contain a token that resembles a known model id (e.g. "GPT-4o", "o3-mini").
+  const modelTokens = knownModels.map((m) => m.toLowerCase())
+  const azureRows = azureRowsRaw.filter((row) => {
+    const haystack =
+      `${row.productName ?? ""} ${row.meterName ?? ""} ${row.armSkuName ?? ""}`.toLowerCase()
+    return modelTokens.some((tok) => haystack.includes(tok))
+  })
+  consola.debug(
+    `Pricing sync: ${azureRowsRaw.length} Azure rows → ${azureRows.length} after filtering for ${knownModels.length} models`,
+  )
+  // Trim Anthropic HTML: extract only the pricing table area to reduce token count.
+  let trimmedAnthropicHtml = anthropicHtml
+  if (anthropicHtml && anthropicHtml.length > 10_000) {
+    // Keep a generous window around pricing-related content
+    const idx = anthropicHtml.toLowerCase().indexOf("pricing")
+    if (idx !== -1) {
+      const start = Math.max(0, idx - 2000)
+      const end = Math.min(anthropicHtml.length, idx + 8000)
+      trimmedAnthropicHtml = anthropicHtml.slice(start, end)
+    }
+  }
+  return { knownModels, azureRows, anthropicHtml: trimmedAnthropicHtml }
 }
 
 export const NORMALIZER_SYSTEM_PROMPT = `You are a pricing extractor. Convert raw price source rows from Azure Retail Prices API and the Anthropic public pricing page into a normalized JSON shape.
@@ -134,7 +156,6 @@ export async function callSyncLlm(
       },
       body: JSON.stringify({
         model: modelId,
-        response_format: { type: "json_object" },
         messages: [
           { role: "system", content: NORMALIZER_SYSTEM_PROMPT },
           { role: "user", content: JSON.stringify(req) },
@@ -143,8 +164,9 @@ export async function callSyncLlm(
     },
   )
   if (!resp.ok) {
+    const errorBody = await resp.text().catch(() => "<unreadable>")
     throw new Error(
-      `Pricing-sync LLM call failed: ${resp.status} ${resp.statusText}`,
+      `Pricing-sync LLM call failed: ${resp.status} ${resp.statusText}\n${errorBody}`,
     )
   }
   const body = (await resp.json()) as {
@@ -154,7 +176,12 @@ export async function callSyncLlm(
   if (!content) {
     throw new Error("Pricing-sync LLM response had no content")
   }
-  const parsed = JSON.parse(content) as ParsedPricing
+  // Strip markdown fences if the model wraps its response
+  const cleaned = content
+    .replace(/^```(?:json)?\s*/i, "")
+    .replace(/\s*```\s*$/, "")
+    .trim()
+  const parsed = JSON.parse(cleaned) as ParsedPricing
   if (!Array.isArray(parsed.models)) {
     throw new TypeError("Pricing-sync LLM response missing `models` array")
   }

From 3e0a5d7845d223168993789ea83983c94496dcbe Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 22:18:09 +0800
Subject: [PATCH 19/34] feat: add multi-account auth management
 (add/list/remove)

Add `auth add`, `auth list`, and `auth remove` subcommands for interactive
multi-account management. `auth add` runs Device Flow OAuth and auto-detects
the GitHub username as account name. Accounts are stored in accounts.json.
Legacy `auth` (bare) still works as before for backward compatibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/auth-add.ts            | 91 ++++++++++++++++++++++++++++++++++++++
 src/auth-list.ts           | 71 +++++++++++++++++++++++++++++
 src/auth-remove.ts         | 52 ++++++++++++++++++++++
 src/auth.ts                | 11 ++++-
 src/lib/accounts-loader.ts | 59 ++++++++++++++++++++++++
 src/lib/paths.ts           |  2 +
 src/lib/token.ts           | 22 +++++----
 7 files changed, 299 insertions(+), 9 deletions(-)
 create mode 100644 src/auth-add.ts
 create mode 100644 src/auth-list.ts
 create mode 100644 src/auth-remove.ts

diff --git a/src/auth-add.ts b/src/auth-add.ts
new file mode 100644
index 000000000..dd84261a8
--- /dev/null
+++ b/src/auth-add.ts
@@ -0,0 +1,91 @@
+import { defineCommand } from "citty"
+import consola from "consola"
+
+import { addAccountEntry } from "./lib/accounts-loader"
+import { ensurePaths } from "./lib/paths"
+import { state } from "./lib/state"
+import { runDeviceFlow } from "./lib/token"
+import { getGitHubUser } from "./services/github/get-user"
+
+interface RunAuthAddOptions {
+  name?: string
+  accountType: string
+  verbose: boolean
+}
+
+async function resolveAccountName(
+  token: string,
+  explicitName?: string,
+): Promise<string> {
+  if (explicitName) return explicitName
+  try {
+    const user = await getGitHubUser({
+      account: {
+        name: "_probe",
+        accountType: state.accountType,
+        githubToken: token,
+        copilotTokenRefreshAt: 0,
+        inFlight: 0,
+        lastUsedAt: 0,
+        failureCount: 0,
+      },
+      vsCodeVersion: state.vsCodeVersion,
+    })
+    consola.info(`Detected GitHub user: ${user.login}`)
+    return user.login
+  } catch {
+    consola.warn("Could not detect GitHub username, using 'default'")
+    return "default"
+  }
+}
+
+export async function runAuthAdd(options: RunAuthAddOptions): Promise<void> {
+  if (options.verbose) {
+    consola.level = 5
+  }
+  state.accountType = options.accountType
+  await ensurePaths()
+
+  consola.info("Starting GitHub Device Flow authentication…")
+  const token = await runDeviceFlow()
+
+  const name = await resolveAccountName(token, options.name)
+  await addAccountEntry({
+    name,
+    github_token: token,
+    account_type: options.accountType,
+  })
+}
+
+export const authAdd = defineCommand({
+  meta: {
+    name: "add",
+    description: "Add a new GitHub account via Device Flow OAuth",
+  },
+  args: {
+    name: {
+      alias: "n",
+      type: "string",
+      description: "Account name (defaults to GitHub username if not provided)",
+    },
+    "account-type": {
+      alias: "a",
+      type: "string",
+      default: "individual",
+      description: "Account type (individual, business, enterprise)",
+    },
+    verbose: {
+      alias: "v",
+      type: "boolean",
+      default: false,
+      description: "Enable verbose logging",
+    },
+  },
+  run({ args }) {
+    return runAuthAdd({
+      name: args.name,
+      accountType: args["account-type"],
+      verbose: args.verbose,
+    })
+  },
+})
diff --git a/src/auth-list.ts b/src/auth-list.ts
new file mode 100644
index 000000000..2692f5b93
--- /dev/null
+++ b/src/auth-list.ts
@@ -0,0 +1,71 @@
+import { defineCommand } from "citty"
+import consola from "consola"
+
+import { readAccountsFile } from "./lib/accounts-loader"
+import { ensurePaths } from "./lib/paths"
+import { state } from "./lib/state"
+import { getGitHubUser } from "./services/github/get-user"
+
+interface RunAuthListOptions {
+  verbose: boolean
+}
+
+export async function runAuthList(options: RunAuthListOptions): Promise<void> {
+  if (options.verbose) {
+    consola.level = 5
+  }
+  await ensurePaths()
+
+  const data = await readAccountsFile()
+  if (data.accounts.length === 0) {
+    consola.info("No accounts found. Use `auth add` to add one.")
+    return
+  }
+
+  const rows: Array<{ name: string; type: string; login: string }> = []
+  for (const entry of data.accounts) {
+    let login: string
+    try {
+      const user = await getGitHubUser({
+        account: {
+          name: entry.name,
+          accountType: entry.account_type ?? "individual",
+          githubToken: entry.github_token,
+          copilotTokenRefreshAt: 0,
+          inFlight: 0,
+          lastUsedAt: 0,
+          failureCount: 0,
+        },
+        vsCodeVersion: state.vsCodeVersion,
+      })
+      login = user.login
+    } catch {
+      login = "(token invalid)"
+    }
+    rows.push({
+      name: entry.name,
+      type: entry.account_type ?? "individual",
+      login,
+    })
+  }
+
+  console.table(rows)
+}
+
+export const authList = defineCommand({
+  meta: {
+    name: "list",
+    description: "List all configured GitHub accounts",
+  },
+  args: {
+    verbose: {
+      alias: "v",
+      type: "boolean",
+      default: false,
+      description: "Enable verbose logging",
+    },
+  },
+  run({ args }) {
+    return runAuthList({ verbose: args.verbose })
+  },
+})
diff --git a/src/auth-remove.ts b/src/auth-remove.ts
new file mode 100644
index 000000000..2ed4a37bc
--- /dev/null
+++ b/src/auth-remove.ts
@@ -0,0 +1,52 @@
+import { defineCommand } from "citty"
+import consola from "consola"
+
+import { removeAccountEntry } from "./lib/accounts-loader"
+import { ensurePaths } from "./lib/paths"
+
+interface RunAuthRemoveOptions {
+  name: string
+  verbose: boolean
+}
+
+export async function runAuthRemove(
+  options: RunAuthRemoveOptions,
+): Promise<void> {
+  if (options.verbose) {
+    consola.level = 5
+  }
+  await ensurePaths()
+
+  const removed = await removeAccountEntry(options.name)
+  if (!removed) {
+    consola.warn(`Account "${options.name}" not found`)
+    process.exitCode = 1
+  }
+}
+
+export const authRemove = defineCommand({
+  meta: {
+    name: "remove",
+    description: "Remove a GitHub account by name",
+  },
+  args: {
+    name: {
+      alias: "n",
+      type: "string",
+      required: true,
+      description: "Name of the account to remove",
+    },
+    verbose: {
+      alias: "v",
+      type: "boolean",
+      default: false,
+      description: "Enable verbose logging",
+    },
+  },
+  run({ args }) {
+    return runAuthRemove({
+      name: args.name,
+      verbose: args.verbose,
+    })
+  },
+})
diff --git a/src/auth.ts b/src/auth.ts
index cb31ff6f8..838701030 100644
--- a/src/auth.ts
+++ b/src/auth.ts
@@ -3,6 +3,9 @@
 import { defineCommand } from "citty"
 import consola from "consola"
 
+import { authAdd } from "./auth-add"
+import { authList } from "./auth-list"
+import { authRemove } from "./auth-remove"
 import { PATHS, ensurePaths } from "./lib/paths"
 import { state } from "./lib/state"
 import { setupGitHubToken } from "./lib/token"
@@ -28,7 +31,8 @@ export async function runAuth(options: RunAuthOptions): Promise<void> {
 export const auth = defineCommand({
   meta: {
     name: "auth",
-    description: "Run GitHub auth flow without running the server",
+    description:
+      "Manage GitHub authentication. Run without subcommand for legacy single-token flow.",
   },
   args: {
     verbose: {
@@ -43,6 +47,11 @@ export const auth = defineCommand({
       description: "Show GitHub token on auth",
     },
   },
+  subCommands: {
+    add: authAdd,
+    list: authList,
+    remove: authRemove,
+  },
   run({ args }) {
     return runAuth({
       verbose: args.verbose,
diff --git a/src/lib/accounts-loader.ts b/src/lib/accounts-loader.ts
index 5addf7447..2bccecc6f 100644
--- a/src/lib/accounts-loader.ts
+++ b/src/lib/accounts-loader.ts
@@ -1,9 +1,11 @@
+import consola from "consola"
 import fs from "node:fs/promises"
 import path from "node:path"
 
 import type { Account } from "./account-pool"
 
 import { getDb } from "./db"
+import { PATHS } from "./paths"
 
 export interface AccountsFileEntry {
   name: string
@@ -78,3 +80,60 @@ export function persistAccounts(accounts: Array<Account>): void {
   })
   tx(accounts)
 }
+
+/** Read accounts file, returning empty accounts array if missing/invalid. */
+export async function readAccountsFile(
+  filePath?: string,
+): Promise<AccountsFile> {
+  const p = filePath ?? PATHS.ACCOUNTS_FILE_PATH
+  try {
+    const buf = await fs.readFile(p)
+    const parsed = JSON.parse(buf.toString("utf8")) as AccountsFile
+    if (Array.isArray(parsed.accounts)) return parsed
+  } catch {
+    // File missing or invalid — return empty
+  }
+  return { accounts: [] }
+}
+
+/** Write accounts file with restricted permissions. */
+export async function writeAccountsFile(
+  data: AccountsFile,
+  filePath?: string,
+): Promise<void> {
+  const p = filePath ?? PATHS.ACCOUNTS_FILE_PATH
+  await fs.writeFile(p, JSON.stringify(data, null, 2), "utf8")
+  try {
+    await fs.chmod(p, 0o600)
+  } catch {
+    // chmod may fail on Windows — non-critical
+  }
+}
+
+/** Append an account entry. Throws if name already exists. */
+export async function addAccountEntry(
+  entry: AccountsFileEntry,
+  filePath?: string,
+): Promise<void> {
+  const data = await readAccountsFile(filePath)
+  if (data.accounts.some((a) => a.name === entry.name)) {
+    throw new Error(`Account "${entry.name}" already exists`)
+  }
+  data.accounts.push(entry)
+  await writeAccountsFile(data, filePath)
+  consola.success(`Account "${entry.name}" added`)
+}
+
+/** Remove an account by name. Returns true if found and removed. */
+export async function removeAccountEntry(
+  name: string,
+  filePath?: string,
+): Promise<boolean> {
+  const data = await readAccountsFile(filePath)
+  const idx = data.accounts.findIndex((a) => a.name === name)
+  if (idx === -1) return false
+  data.accounts.splice(idx, 1)
+  await writeAccountsFile(data, filePath)
+  consola.success(`Account "${name}" removed`)
+  return true
+}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 231560d12..f347221d8 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -6,11 +6,13 @@ const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
 
 const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
 const USAGE_DB_PATH = path.join(APP_DIR, "usage.sqlite")
+const ACCOUNTS_FILE_PATH = path.join(APP_DIR, "accounts.json")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
   USAGE_DB_PATH,
+  ACCOUNTS_FILE_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
diff --git a/src/lib/token.ts b/src/lib/token.ts
index 4e3d8e9f2..137ab7560 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -59,6 +59,19 @@ interface SetupGitHubTokenOptions {
   force?: boolean
 }
 
+/**
+ * Run the GitHub Device Flow OAuth and return the raw access token.
+ * Does NOT write the token to disk — caller decides what to do with it.
+ */
+export async function runDeviceFlow(): Promise<string> {
+  const response = await getDeviceCode()
+  consola.debug("Device code response:", response)
+  consola.info(
+    `Please enter the code "${response.user_code}" in ${response.verification_uri}`,
+  )
+  return pollAccessToken(response)
+}
+
 /**
  * Reads or fetches a single GitHub token file at PATHS.GITHUB_TOKEN_PATH.
  * Returns the token; the caller is responsible for putting it into the
@@ -79,14 +92,7 @@ export async function setupGitHubToken(
     }
 
     consola.info("Not logged in, getting new access token")
-    const response = await getDeviceCode()
-    consola.debug("Device code response:", response)
-
-    consola.info(
-      `Please enter the code "${response.user_code}" in ${response.verification_uri}`,
-    )
-
-    const token = await pollAccessToken(response)
+    const token = await runDeviceFlow()
     await writeGithubToken(token)
 
     if (state.showToken) {

From 5d8f8d7220dfcac9efed9c6f5747bb3882a99e69 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sat, 25 Apr 2026 22:21:22 +0800
Subject: [PATCH 20/34] test: add unit tests for account file CRUD helpers

Tests for readAccountsFile, writeAccountsFile, addAccountEntry, and
removeAccountEntry covering round-trip, duplicate detection, removal,
and missing file scenarios.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/accounts-loader.test.ts | 99 ++++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 1 deletion(-)

diff --git a/tests/accounts-loader.test.ts b/tests/accounts-loader.test.ts
index 135c1be95..6d4d7c887 100644
--- a/tests/accounts-loader.test.ts
+++ b/tests/accounts-loader.test.ts
@@ -3,7 +3,14 @@ import fs from "node:fs"
 import os from "node:os"
 import path from "node:path"
 
-import { loadAccounts, persistAccounts } from "../src/lib/accounts-loader"
+import {
+  loadAccounts,
+  persistAccounts,
+  readAccountsFile,
+  writeAccountsFile,
+  addAccountEntry,
+  removeAccountEntry,
+} from "../src/lib/accounts-loader"
 import { initDb, __resetDbForTests } from "../src/lib/db"
 
 const tmp = (suffix = "") =>
@@ -87,3 +94,93 @@ describe("accounts-loader", () => {
     }
   })
 })
+
+describe("account file helpers", () => {
+  test("readAccountsFile returns empty for missing file", async () => {
+    const data = await readAccountsFile(tmp(".json"))
+    expect(data).toEqual({ accounts: [] })
+  })
+
+  test("writeAccountsFile + readAccountsFile round-trip", async () => {
+    const file = tmp(".json")
+    const payload = {
+      accounts: [
+        { name: "a", github_token: "ghu_x", account_type: "individual" },
+      ],
+    }
+    await writeAccountsFile(payload, file)
+    const read = await readAccountsFile(file)
+    expect(read.accounts).toHaveLength(1)
+    expect(read.accounts[0].name).toBe("a")
+    fs.unlinkSync(file)
+  })
+
+  test("addAccountEntry appends to file", async () => {
+    const file = tmp(".json")
+    await writeAccountsFile({ accounts: [] }, file)
+    await addAccountEntry(
+      { name: "alice", github_token: "ghu_a", account_type: "individual" },
+      file,
+    )
+    await addAccountEntry(
+      { name: "bob", github_token: "ghu_b", account_type: "business" },
+      file,
+    )
+    const data = await readAccountsFile(file)
+    expect(data.accounts).toHaveLength(2)
+    expect(data.accounts.map((a) => a.name)).toEqual(["alice", "bob"])
+    fs.unlinkSync(file)
+  })
+
+  test("addAccountEntry throws on duplicate name", async () => {
+    const file = tmp(".json")
+    await writeAccountsFile(
+      { accounts: [{ name: "alice", github_token: "ghu_a" }] },
+      file,
+    )
+    let caught: Error | undefined
+    try {
+      await addAccountEntry({ name: "alice", github_token: "ghu_b" }, file)
+    } catch (err) {
+      caught = err as Error
+    }
+    expect(caught?.message).toBe('Account "alice" already exists')
+    fs.unlinkSync(file)
+  })
+
+  test("removeAccountEntry removes by name", async () => {
+    const file = tmp(".json")
+    await writeAccountsFile(
+      {
+        accounts: [
+          { name: "alice", github_token: "ghu_a" },
+          { name: "bob", github_token: "ghu_b" },
+        ],
+      },
+      file,
+    )
+    const removed = await removeAccountEntry("alice", file)
+    expect(removed).toBe(true)
+    const data = await readAccountsFile(file)
+    expect(data.accounts).toHaveLength(1)
+    expect(data.accounts[0].name).toBe("bob")
+    fs.unlinkSync(file)
+  })
+
+  test("removeAccountEntry returns false for unknown name", async () => {
+    const file = tmp(".json")
+    await writeAccountsFile({ accounts: [] }, file)
+    const removed = await removeAccountEntry("ghost", file)
+    expect(removed).toBe(false)
+    fs.unlinkSync(file)
+  })
+
+  test("addAccountEntry creates file if missing", async () => {
+    const file = tmp(".json")
+    await addAccountEntry({ name: "new", github_token: "ghu_new" }, file)
+    const data = await readAccountsFile(file)
+    expect(data.accounts).toHaveLength(1)
+    expect(data.accounts[0].name).toBe("new")
+    fs.unlinkSync(file)
+  })
+})

From 9ca32651c7c9d25a925a25e2aea40a8b40c3c1ea Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 11:52:00 +0800
Subject: [PATCH 21/34] feat: cross-runtime SQLite, multi-token CLI, updated
 README

- Add runtime-adaptive SQLite adapter (bun:sqlite for Bun, better-sqlite3 for Node.js)
- Support comma-separated multi-token --github-token with name:type:token format
- Rewrite README with full feature documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md                             | 498 ++++++++++++++------------
 bun.lock                              |  77 ++++
 docs/tasks/04-with-account-wrapper.md |  68 ----
 package.json                          |  10 +-
 src/lib/accounts-loader.ts            |  59 +++
 src/lib/db.ts                         |  40 +--
 src/lib/pricing-scheduler.ts          |   7 +-
 src/lib/pricing-sync-runner.ts        |  96 +++--
 src/lib/pricing-sync.ts               |   3 +
 src/lib/sqlite-adapter.ts             | 110 ++++++
 src/lib/usage-recorder.ts             |   4 +-
 src/lib/usage-stats.ts                |  64 ++--
 src/pricing-sync-cmd.ts               |  20 +-
 src/routes/usage/route.ts             |  10 +-
 src/start.ts                          |  30 +-
 tests/accounts-loader.test.ts         |   7 +-
 tests/db.test.ts                      |  93 ++---
 tests/parse-github-token.test.ts      | 122 +++++++
 tests/pricing-sync-runner.test.ts     |  41 +--
 tests/usage-recorder.test.ts          |  34 +-
 tests/usage-stats.test.ts             |  17 +-
 tsdown.config.ts                      |  19 +
 22 files changed, 894 insertions(+), 535 deletions(-)
 delete mode 100644 docs/tasks/04-with-account-wrapper.md
 create mode 100644 src/lib/sqlite-adapter.ts
 create mode 100644 tests/parse-github-token.test.ts

diff --git a/README.md b/README.md
index 0d36c13c9..1885c2f4b 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,8 @@
 > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
 > [!WARNING]
-> **GitHub Security Notice:**  
-> Excessive automated or scripted use of Copilot (including rapid or bulk requests, such as via automated tools) may trigger GitHub's abuse-detection systems.  
+> **GitHub Security Notice:**
+> Excessive automated or scripted use of Copilot (including rapid or bulk requests, such as via automated tools) may trigger GitHub's abuse-detection systems.
 > You may receive a warning from GitHub Security, and further anomalous activity could result in temporary suspension of your Copilot access.
 >
 > GitHub prohibits use of their servers for excessive automated bulk activity or any activity that places undue burden on their infrastructure.
@@ -25,282 +25,152 @@
 
 ---
 
-## Project Overview
+## Overview
 
-A reverse-engineered proxy for the GitHub Copilot API that exposes it as an OpenAI and Anthropic compatible service. This allows you to use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API, including to power [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview).
+A reverse-engineered proxy for the GitHub Copilot API that exposes it as an **OpenAI** and **Anthropic** compatible service. Use GitHub Copilot with any tool that supports the OpenAI Chat Completions API or the Anthropic Messages API — including [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview).
 
 ## Features
 
-- **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
-- **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`).
-- **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics.
-- **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests.
-- **Manual Request Approval**: Manually approve or deny each API request for fine-grained control over usage (`--manual`).
-- **Token Visibility**: Option to display GitHub and Copilot tokens during authentication and refresh for debugging (`--show-token`).
-- **Flexible Authentication**: Authenticate interactively or provide a GitHub token directly, suitable for CI/CD environments.
-- **Support for Different Account Types**: Works with individual, business, and enterprise GitHub Copilot plans.
+- **OpenAI & Anthropic Compatible API** — `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`, `/v1/messages`, `/v1/messages/count_tokens`
+- **Claude Code Integration** — One-command setup with `--claude-code`, or manual `settings.json` configuration
+- **Multi-Account & Load Balancing** — Configure multiple GitHub accounts with round-robin, least-busy, or least-recent strategies
+- **Multi-Token CLI** — Pass multiple tokens directly on the command line with `name:type:token` format, no accounts file needed
+- **Usage Tracking & Dashboard** — Built-in SQLite database tracks per-model token usage and costs; web dashboard at `/usage`
+- **Automatic Pricing Sync** — Periodically fetches model pricing from Azure and Anthropic to calculate accurate cost estimates
+- **Rate Limiting** — Configurable request throttling with optional wait mode
+- **Manual Request Approval** — Approve or deny each API request interactively
+- **Cross-Runtime** — Runs on both Node.js (via npx) and Bun, with runtime-adaptive SQLite (better-sqlite3 / bun:sqlite)
 
 ## Demo
 
 https://github.com/user-attachments/assets/7654b383-669d-4eb9-b23c-06d7aefee8c5
 
-## Prerequisites
+## Quick Start
 
-- Bun (>= 1.2.x)
-- GitHub account with Copilot subscription (individual, business, or enterprise)
-
-## Installation
-
-To install dependencies, run:
+### Using npx (Node.js)
 
 ```sh
-bun install
-```
+# Start the server (will prompt for GitHub auth on first run)
+npx @weavejam/copilot-proxy@latest start
 
-## Using with Docker
+# Start with a specific token
+npx @weavejam/copilot-proxy@latest start --github-token ghu_YOUR_TOKEN
+```
 
-Build image
+### Using Bun
 
 ```sh
-docker build -t copilot-api .
+bun install
+bun run start
 ```
 
-Run the container
+### Prerequisites
 
-```sh
-# Create a directory on your host to persist the GitHub token and related data
-mkdir -p ./copilot-data
+- **Node.js >= 18** or **Bun >= 1.2**
+- GitHub account with an active Copilot subscription (individual, business, or enterprise)
 
-# Run the container with a bind mount to persist the token
-# This ensures your authentication survives container restarts
+## Authentication
 
-docker run -p 4141:4141 -v $(pwd)/copilot-data:/root/.local/share/copilot-api copilot-api
-```
+### Interactive (Device Flow OAuth)
 
-> **Note:**
-> The GitHub token and related data will be stored in `copilot-data` on your host. This is mapped to `/root/.local/share/copilot-api` inside the container, ensuring persistence across restarts.
+When no token is provided, the proxy starts an interactive GitHub Device Flow. Follow the on-screen instructions to authorize.
 
-### Docker with Environment Variables
+### Direct Token
 
-You can pass the GitHub token directly to the container using environment variables:
+Pass a token from a previous `auth` session:
 
 ```sh
-# Build with GitHub token
-docker build --build-arg GH_TOKEN=your_github_token_here -t copilot-api .
-
-# Run with GitHub token
-docker run -p 4141:4141 -e GH_TOKEN=your_github_token_here copilot-api
-
-# Run with additional options
-docker run -p 4141:4141 -e GH_TOKEN=your_token copilot-api start --verbose --port 4141
+npx @weavejam/copilot-proxy@latest start --github-token ghu_YOUR_TOKEN
 ```
 
-### Docker Compose Example
-
-```yaml
-version: "3.8"
-services:
-  copilot-api:
-    build: .
-    ports:
-      - "4141:4141"
-    environment:
-      - GH_TOKEN=your_github_token_here
-    restart: unless-stopped
-```
+### Multi-Token CLI
 
-The Docker image includes:
+Pass multiple tokens in a single `--github-token` flag using comma-separated `name:type:token` format:
 
-- Multi-stage build for optimized image size
-- Non-root user for enhanced security
-- Health check for container monitoring
-- Pinned base image version for reproducible builds
+```sh
+# Full format: name:type:token
+npx @weavejam/copilot-proxy@latest start \
+  --github-token "personal:individual:ghu_aaa,work:business:ghu_bbb"
 
-## Using with npx
+# Omit type (defaults to individual): name:token
+npx @weavejam/copilot-proxy@latest start \
+  --github-token "personal:ghu_aaa,work:ghu_bbb"
 
-You can run the project directly using npx:
+# Pure tokens (auto-named account-1, account-2): token
+npx @weavejam/copilot-proxy@latest start \
+  --github-token "ghu_aaa,ghu_bbb"
 
-```sh
-npx copilot-api@latest start
+# Mixed formats work too
+npx @weavejam/copilot-proxy@latest start \
+  --github-token "ghu_bare,named:ghu_two,full:business:ghu_three"
 ```
 
-With options:
+### Accounts File
 
-```sh
-npx copilot-api@latest start --port 8080
-```
+For persistent multi-account configuration, create a JSON file:
 
-For authentication only:
+```json
+{
+  "accounts": [
+    { "name": "personal", "github_token": "ghu_...", "account_type": "individual" },
+    { "name": "work", "github_token": "ghu_...", "account_type": "business" }
+  ]
+}
+```
 
 ```sh
-npx copilot-api@latest auth
+npx @weavejam/copilot-proxy@latest start --accounts-file ./accounts.json
 ```
 
-## Command Structure
-
-Copilot API now uses a subcommand structure with these main commands:
-
-- `start`: Start the Copilot API server. This command will also handle authentication if needed.
-- `auth`: Run GitHub authentication flow without starting the server. This is typically used if you need to generate a token for use with the `--github-token` option, especially in non-interactive environments.
-- `check-usage`: Show your current GitHub Copilot usage and quota information directly in the terminal (no server required).
-- `debug`: Display diagnostic information including version, runtime details, file paths, and authentication status. Useful for troubleshooting and support.
-
-## Command Line Options
-
-### Start Command Options
-
-The following command line options are available for the `start` command:
-
-| Option         | Description                                                                   | Default    | Alias |
-| -------------- | ----------------------------------------------------------------------------- | ---------- | ----- |
-| --port         | Port to listen on                                                             | 4141       | -p    |
-| --verbose      | Enable verbose logging                                                        | false      | -v    |
-| --account-type | Account type to use (individual, business, enterprise)                        | individual | -a    |
-| --manual       | Enable manual request approval                                                | false      | none  |
-| --rate-limit   | Rate limit in seconds between requests                                        | none       | -r    |
-| --wait         | Wait instead of error when rate limit is hit                                  | false      | -w    |
-| --github-token | Provide GitHub token directly (must be generated using the `auth` subcommand) | none       | -g    |
-| --claude-code  | Generate a command to launch Claude Code with Copilot API config              | false      | -c    |
-| --show-token   | Show GitHub and Copilot tokens on fetch and refresh                           | false      | none  |
-| --proxy-env    | Initialize proxy from environment variables                                   | false      | none  |
-
-### Auth Command Options
-
-| Option       | Description               | Default | Alias |
-| ------------ | ------------------------- | ------- | ----- |
-| --verbose    | Enable verbose logging    | false   | -v    |
-| --show-token | Show GitHub token on auth | false   | none  |
-
-### Debug Command Options
-
-| Option | Description               | Default | Alias |
-| ------ | ------------------------- | ------- | ----- |
-| --json | Output debug info as JSON | false   | none  |
-
-## API Endpoints
-
-The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
-
-### OpenAI Compatible Endpoints
-
-These endpoints mimic the OpenAI API structure.
-
-| Endpoint                    | Method | Description                                               |
-| --------------------------- | ------ | --------------------------------------------------------- |
-| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
-| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
-| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
-
-### Anthropic Compatible Endpoints
-
-These endpoints are designed to be compatible with the Anthropic Messages API.
-
-| Endpoint                         | Method | Description                                                  |
-| -------------------------------- | ------ | ------------------------------------------------------------ |
-| `POST /v1/messages`              | `POST` | Creates a model response for a given conversation.           |
-| `POST /v1/messages/count_tokens` | `POST` | Calculates the number of tokens for a given set of messages. |
-
-### Usage Monitoring Endpoints
-
-New endpoints for monitoring your Copilot usage and quotas.
-
-| Endpoint     | Method | Description                                                  |
-| ------------ | ------ | ------------------------------------------------------------ |
-| `GET /usage` | `GET`  | Get detailed Copilot usage statistics and quota information. |
-| `GET /token` | `GET`  | Get the current Copilot token being used by the API.         |
-
-## Example Usage
-
-Using with npx:
+### Account Management Commands
 
 ```sh
-# Basic usage with start command
-npx copilot-api@latest start
-
-# Run on custom port with verbose logging
-npx copilot-api@latest start --port 8080 --verbose
-
-# Use with a business plan GitHub account
-npx copilot-api@latest start --account-type business
-
-# Use with an enterprise plan GitHub account
-npx copilot-api@latest start --account-type enterprise
+# Add account interactively (auto-detects GitHub username)
+npx @weavejam/copilot-proxy@latest auth add
 
-# Enable manual approval for each request
-npx copilot-api@latest start --manual
+# Add with explicit name and type
+npx @weavejam/copilot-proxy@latest auth add --name work --account-type business
 
-# Set rate limit to 30 seconds between requests
-npx copilot-api@latest start --rate-limit 30
+# List all configured accounts
+npx @weavejam/copilot-proxy@latest auth list
 
-# Wait instead of error when rate limit is hit
-npx copilot-api@latest start --rate-limit 30 --wait
+# Remove an account
+npx @weavejam/copilot-proxy@latest auth remove --name work
 
-# Provide GitHub token directly
-npx copilot-api@latest start --github-token ghp_YOUR_TOKEN_HERE
-
-# Run only the auth flow
-npx copilot-api@latest auth
-
-# Run auth flow with verbose logging
-npx copilot-api@latest auth --verbose
-
-# Show your Copilot usage/quota in the terminal (no server needed)
-npx copilot-api@latest check-usage
-
-# Display debug information for troubleshooting
-npx copilot-api@latest debug
-
-# Display debug information in JSON format
-npx copilot-api@latest debug --json
-
-# Initialize proxy from environment variables (HTTP_PROXY, HTTPS_PROXY, etc.)
-npx copilot-api@latest start --proxy-env
+# Legacy single-token auth (backward compatible)
+npx @weavejam/copilot-proxy@latest auth
 ```
 
-## Using the Usage Viewer
+Accounts are stored in `~/.local/share/copilot-api/accounts.json`.
 
-After starting the server, a URL to the Copilot Usage Dashboard will be displayed in your console. This dashboard is a web interface for monitoring your API usage.
+## Multi-Account Load Balancing
 
-1.  Start the server. For example, using npx:
-    ```sh
-    npx copilot-api@latest start
-    ```
-2.  The server will output a URL to the usage viewer. Copy and paste this URL into your browser. It will look something like this:
-    `https://ericc-ch.github.io/copilot-api?endpoint=http://localhost:4141/usage`
-    - If you use the `start.bat` script on Windows, this page will open automatically.
+When using multiple accounts, choose a strategy with `--strategy`:
 
-The dashboard provides a user-friendly interface to view your Copilot usage data:
+| Strategy | Description |
+| --- | --- |
+| `round-robin` (default) | Rotate through accounts in order |
+| `least-busy` | Pick the account with fewest in-flight requests |
+| `least-recent` | Pick the account used least recently |
 
-- **API Endpoint URL**: The dashboard is pre-configured to fetch data from your local server endpoint via the URL query parameter. You can change this URL to point to any other compatible API endpoint.
-- **Fetch Data**: Click the "Fetch" button to load or refresh the usage data. The dashboard will automatically fetch data on load.
-- **Usage Quotas**: View a summary of your usage quotas for different services like Chat and Completions, displayed with progress bars for a quick overview.
-- **Detailed Information**: See the full JSON response from the API for a detailed breakdown of all available usage statistics.
-- **URL-based Configuration**: You can also specify the API endpoint directly in the URL using a query parameter. This is useful for bookmarks or sharing links. For example:
-  `https://ericc-ch.github.io/copilot-api?endpoint=http://your-api-server/usage`
+```sh
+npx @weavejam/copilot-proxy@latest start \
+  --accounts-file ./accounts.json --strategy least-busy
+```
 
 ## Using with Claude Code
 
-This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.
-
-There are two ways to configure Claude Code to use this proxy:
-
-### Interactive Setup with `--claude-code` flag
-
-To get started, run the `start` command with the `--claude-code` flag:
+### Interactive Setup
 
 ```sh
-npx copilot-api@latest start --claude-code
+npx @weavejam/copilot-proxy@latest start --claude-code
 ```
 
-You will be prompted to select a primary model and a "small, fast" model for background tasks. After selecting the models, a command will be copied to your clipboard. This command sets the necessary environment variables for Claude Code to use the proxy.
-
-Paste and run this command in a new terminal to launch Claude Code.
+Select a primary and a small/fast model when prompted. A command is copied to your clipboard — paste it into a new terminal to launch Claude Code.
 
-### Manual Configuration with `settings.json`
+### Manual Configuration
 
-Alternatively, you can configure Claude Code by creating a `.claude/settings.json` file in your project's root directory. This file should contain the environment variables needed by Claude Code. This way you don't need to run the interactive setup every time.
-
-Here is an example `.claude/settings.json` file:
+Create `.claude/settings.json` in your project root:
 
 ```json
 {
@@ -315,37 +185,203 @@ Here is an example `.claude/settings.json` file:
     "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
   },
   "permissions": {
-    "deny": [
-      "WebSearch"
-    ]
+    "deny": ["WebSearch"]
   }
 }
 ```
 
-You can find more options here: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables)
+See: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables) | [IDE integrations](https://docs.anthropic.com/en/docs/claude-code/ide-integrations)
+
+## Pricing Sync
+
+The proxy fetches model pricing from Azure and Anthropic pricing pages, storing per-model cost data in the local SQLite database. This powers the cost estimates in the `/usage` endpoint.
+
+- **Automatic**: Syncs every 7 days on server start. Configure with `--pricing-sync-interval-days` or disable with `--pricing-sync-disabled`.
+- **Manual**: Run a one-off sync:
+
+```sh
+npx @weavejam/copilot-proxy@latest pricing-sync
+```
+
+## Usage Dashboard
 
-You can also read more about IDE integration here: [Add Claude Code to your IDE](https://docs.anthropic.com/en/docs/claude-code/ide-integrations)
+After starting the server, a URL to the web-based usage dashboard is printed to the console:
 
-## Running from Source
+```
+https://ericc-ch.github.io/copilot-api?endpoint=http://localhost:4141/usage
+```
 
-The project can be run from source in several ways:
+The dashboard shows per-model token usage, cost breakdowns, and quota information.
 
-### Development Mode
+You can also check usage directly in the terminal (no server required):
 
 ```sh
-bun run dev
+npx @weavejam/copilot-proxy@latest check-usage
 ```
 
-### Production Mode
+## API Endpoints
+
+### OpenAI Compatible
+
+| Endpoint | Method | Description |
+| --- | --- | --- |
+| `/v1/chat/completions` | POST | Chat completions (streaming supported) |
+| `/v1/models` | GET | List available models |
+| `/v1/embeddings` | POST | Generate embedding vectors |
+
+### Anthropic Compatible
+
+| Endpoint | Method | Description |
+| --- | --- | --- |
+| `/v1/messages` | POST | Messages API (streaming supported) |
+| `/v1/messages/count_tokens` | POST | Count tokens for a message set |
+
+### Monitoring
+
+| Endpoint | Method | Description |
+| --- | --- | --- |
+| `/usage` | GET | Detailed usage statistics with cost estimates |
+| `/token` | GET | Current Copilot token |
+| `/` | GET | Health check |
+
+## Command Reference
+
+### Commands
+
+| Command | Description |
+| --- | --- |
+| `start` | Start the proxy server |
+| `auth` | Legacy single-token authentication |
+| `auth add` | Add a GitHub account via Device Flow OAuth |
+| `auth list` | List configured accounts |
+| `auth remove` | Remove an account |
+| `check-usage` | Show Copilot usage/quota in terminal |
+| `pricing-sync` | Run one-off pricing data sync |
+| `debug` | Show diagnostic information |
+
+### `start` Options
+
+| Option | Description | Default | Alias |
+| --- | --- | --- | --- |
+| `--port` | Port to listen on | 4141 | `-p` |
+| `--verbose` | Enable verbose logging | false | `-v` |
+| `--account-type` | Account type (individual, business, enterprise) | individual | `-a` |
+| `--github-token` | GitHub token(s), supports `name:type:token` comma-separated format | — | `-g` |
+| `--accounts-file` | Path to accounts JSON file | — | — |
+| `--strategy` | Load balancing: round-robin, least-busy, least-recent | round-robin | — |
+| `--rate-limit` | Minimum seconds between requests | — | `-r` |
+| `--wait` | Wait instead of error on rate limit | false | `-w` |
+| `--manual` | Enable manual request approval | false | — |
+| `--claude-code` | Interactive Claude Code setup | false | `-c` |
+| `--show-token` | Show tokens on fetch and refresh | false | — |
+| `--proxy-env` | Use HTTP_PROXY/HTTPS_PROXY env vars | false | — |
+| `--db-path` | Path to SQLite database | auto | — |
+| `--pricing-sync-model` | Model for LLM pricing extraction | auto | — |
+| `--pricing-sync-interval-days` | Days between automatic pricing syncs | 7 | — |
+| `--pricing-sync-disabled` | Disable automatic pricing sync | false | — |
+
+### `auth add` Options
+
+| Option | Description | Default | Alias |
+| --- | --- | --- | --- |
+| `--name` | Account name (defaults to GitHub username) | auto | `-n` |
+| `--account-type` | Account type | individual | `-a` |
+| `--verbose` | Verbose logging | false | `-v` |
+
+### `auth remove` Options
+
+| Option | Description | Default | Alias |
+| --- | --- | --- | --- |
+| `--name` | Account name to remove | required | `-n` |
+| `--verbose` | Verbose logging | false | `-v` |
+
+### `pricing-sync` Options
+
+| Option | Description | Default | Alias |
+| --- | --- | --- | --- |
+| `--port` | Temp server port for LLM call | 4141 | `-p` |
+| `--sync-model` | Model for extraction | auto | — |
+| `--github-token` | GitHub token(s), same format as start | — | `-g` |
+| `--accounts-file` | Accounts JSON file | — | — |
+| `--account-type` | Account type | individual | `-a` |
+| `--db-path` | SQLite database path | auto | — |
+| `--proxy-env` | Use proxy env vars | false | — |
+| `--verbose` | Verbose logging | false | `-v` |
+
+### `debug` Options
+
+| Option | Description | Default |
+| --- | --- | --- |
+| `--json` | Output as JSON | false |
+
+## Docker
+
+### Build & Run
 
 ```sh
+docker build -t copilot-api .
+mkdir -p ./copilot-data
+docker run -p 4141:4141 -v $(pwd)/copilot-data:/root/.local/share/copilot-api copilot-api
+```
+
+### With Token
+
+```sh
+docker run -p 4141:4141 -e GH_TOKEN=ghu_YOUR_TOKEN copilot-api
+```
+
+### Docker Compose
+
+```yaml
+version: "3.8"
+services:
+  copilot-api:
+    build: .
+    ports:
+      - "4141:4141"
+    environment:
+      - GH_TOKEN=ghu_YOUR_TOKEN
+    restart: unless-stopped
+```
+
+## Development
+
+```sh
+# Install dependencies
+bun install
+
+# Development mode (watch)
+bun run dev
+
+# Production mode
 bun run start
+
+# Run tests
+bun test
+
+# Build for npm
+bun run build
+
+# Type check
+bun run typecheck
+
+# Lint
+bun run lint
 ```
 
+## Data Storage
+
+All data is stored in `~/.local/share/copilot-api/`:
+
+| File | Purpose |
+| --- | --- |
+| `github_token` | Stored GitHub OAuth token |
+| `accounts.json` | Multi-account configuration |
+| `usage.sqlite` | Usage tracking and pricing data |
+
 ## Usage Tips
 
-- To avoid hitting GitHub Copilot's rate limits, you can use the following flags:
-  - `--manual`: Enables manual approval for each request, giving you full control over when requests are sent.
-  - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
-  - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
-- If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
+- Use `--rate-limit 30 --wait` to throttle requests and queue them instead of erroring.
+- Use `--manual` to approve each request individually — useful for debugging or auditing.
+- Use `--account-type business` or `enterprise` if your Copilot subscription is through an organization. See the [official docs](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization).
+- Multi-token CLI (`--github-token "a:individual:ghu_x,b:business:ghu_y"`) is a quick alternative to accounts files for CI/CD or one-off use.
diff --git a/bun.lock b/bun.lock
index 20e895e7f..27cc49c20 100644
--- a/bun.lock
+++ b/bun.lock
@@ -1,9 +1,11 @@
 {
   "lockfileVersion": 1,
+  "configVersion": 0,
   "workspaces": {
     "": {
       "name": "copilot-api",
       "dependencies": {
+        "better-sqlite3": "^12.9.0",
         "citty": "^0.1.6",
         "clipboardy": "^5.0.0",
         "consola": "^3.4.2",
@@ -18,6 +20,7 @@
       },
       "devDependencies": {
         "@echristian/eslint-config": "^0.0.54",
+        "@types/better-sqlite3": "^7.6.13",
         "@types/bun": "^1.2.23",
         "@types/proxy-from-env": "^1.0.4",
         "bumpp": "^10.2.3",
@@ -196,6 +199,8 @@
 
     "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
 
+    "@types/better-sqlite3": ["@types/better-sqlite3@7.6.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA=="],
+
     "@types/bun": ["@types/bun@1.2.23", "", { "dependencies": { "bun-types": "1.2.23" } }, "sha512-le8ueOY5b6VKYf19xT3McVbXqLqmxzPXHsQT/q9JHgikJ2X22wyTW3g3ohz2ZMnp7dod6aduIiq8A14Xyimm0A=="],
 
     "@types/estree": ["@types/estree@1.0.8", "", {}, "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w=="],
@@ -272,18 +277,28 @@
 
     "balanced-match": ["balanced-match@1.0.2", "", {}, "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="],
 
+    "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="],
+
     "baseline-browser-mapping": ["baseline-browser-mapping@2.8.11", "", { "bin": { "baseline-browser-mapping": "dist/cli.js" } }, "sha512-i+sRXGhz4+QW8aACZ3+r1GAKMt0wlFpeA8M5rOQd0HEYw9zhDrlx9Wc8uQ0IdXakjJRthzglEwfB/yqIjO6iDg=="],
 
+    "better-sqlite3": ["better-sqlite3@12.9.0", "", { "dependencies": { "bindings": "^1.5.0", "prebuild-install": "^7.1.1" } }, "sha512-wqUv4Gm3toFpHDQmaKD4QhZm3g1DjUBI0yzS4UBl6lElUmXFYdTQmmEDpAFa5o8FiFiymURypEnfVHzILKaxqQ=="],
+
+    "bindings": ["bindings@1.5.0", "", { "dependencies": { "file-uri-to-path": "1.0.0" } }, "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ=="],
+
     "birecord": ["birecord@0.1.1", "", {}, "sha512-VUpsf/qykW0heRlC8LooCq28Kxn3mAqKohhDG/49rrsQ1dT1CXyj/pgXS+5BSRzFTR/3DyIBOqQOrGyZOh71Aw=="],
 
     "birpc": ["birpc@2.6.1", "", {}, "sha512-LPnFhlDpdSH6FJhJyn4M0kFO7vtQ5iPw24FnG0y21q09xC7e8+1LeR31S1MAIrDAHp4m7aas4bEkTDTvMAtebQ=="],
 
+    "bl": ["bl@4.1.0", "", { "dependencies": { "buffer": "^5.5.0", "inherits": "^2.0.4", "readable-stream": "^3.4.0" } }, "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w=="],
+
     "brace-expansion": ["brace-expansion@1.1.12", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg=="],
 
     "braces": ["braces@3.0.3", "", { "dependencies": { "fill-range": "^7.1.1" } }, "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA=="],
 
     "browserslist": ["browserslist@4.26.3", "", { "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", "electron-to-chromium": "^1.5.227", "node-releases": "^2.0.21", "update-browserslist-db": "^1.1.3" }, "bin": { "browserslist": "cli.js" } }, "sha512-lAUU+02RFBuCKQPj/P6NgjlbCnLBMp4UtgTx7vNHd3XSIJF87s9a5rA3aH2yw3GS9DqZAUbOtZdCCiZeVRqt0w=="],
 
+    "buffer": ["buffer@5.7.1", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" } }, "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ=="],
+
     "builtin-modules": ["builtin-modules@5.0.0", "", {}, "sha512-bkXY9WsVpY7CvMhKSR6pZilZu9Ln5WDrKVBUXf2S443etkmEO4V58heTecXcUIsNsi4Rx8JUO4NfX1IcQl4deg=="],
 
     "bumpp": ["bumpp@10.2.3", "", { "dependencies": { "ansis": "^4.1.0", "args-tokenizer": "^0.3.0", "c12": "^3.2.0", "cac": "^6.7.14", "escalade": "^3.2.0", "jsonc-parser": "^3.3.1", "package-manager-detector": "^1.3.0", "semver": "^7.7.2", "tinyexec": "^1.0.1", "tinyglobby": "^0.2.14", "yaml": "^2.8.1" }, "bin": { "bumpp": "bin/bumpp.mjs" } }, "sha512-nsFBZACxuBVu6yzDSaZZaWpX5hTQ+++9WtYkmO+0Bd3cpSq0Mzvqw5V83n+fOyRj3dYuZRFCQf5Z9NNfZj+Rnw=="],
@@ -310,6 +325,8 @@
 
     "chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="],
 
+    "chownr": ["chownr@1.1.4", "", {}, "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="],
+
     "ci-info": ["ci-info@4.3.0", "", {}, "sha512-l+2bNRMiQgcfILUi33labAZYIWlH1kWDp+ecNo5iisRKrbm0xcRyCww71/YU0Fkw0mAFpz9bJayXPjey6vkmaQ=="],
 
     "citty": ["citty@0.1.6", "", { "dependencies": { "consola": "^3.2.3" } }, "sha512-tskPPKEs8D2KPafUypv2gxwJP8h/OaJmC82QQGGDQcHvXX43xF2VDACcJVmZ0EuSxkpO9Kc4MlrA3q0+FG58AQ=="],
@@ -360,6 +377,10 @@
 
     "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
 
+    "decompress-response": ["decompress-response@6.0.0", "", { "dependencies": { "mimic-response": "^3.1.0" } }, "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ=="],
+
+    "deep-extend": ["deep-extend@0.6.0", "", {}, "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA=="],
+
     "deep-is": ["deep-is@0.1.4", "", {}, "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="],
 
     "define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="],
@@ -372,6 +393,8 @@
 
     "detect-indent": ["detect-indent@7.0.2", "", {}, "sha512-y+8xyqdGLL+6sh0tVeHcfP/QDd8gUgbasolJJpY7NgeQGSZ739bDtSiaiDgtoicy+mtYB81dKLxO9xRhCyIB3A=="],
 
+    "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="],
+
     "detect-newline": ["detect-newline@4.0.1", "", {}, "sha512-qE3Veg1YXzGHQhlA6jzebZN2qVf6NX+A7m7qlhCGG30dJixrAQhYOsJjsnBjJkCSmuOPpCk30145fr8FV0bzog=="],
 
     "diff": ["diff@8.0.2", "", {}, "sha512-sSuxWU5j5SR9QQji/o2qMvqRNYRDOcBTgsJ/DeCf4iSN4gW+gNMXM7wFIP+fdXZxoNiAnHUTGjCr+TSWXdRDKg=="],
@@ -388,6 +411,8 @@
 
     "empathic": ["empathic@2.0.0", "", {}, "sha512-i6UzDscO/XfAcNYD75CfICkmfLedpyPDdozrLMmQc5ORaQcdMoc21OnlEylMIqI7U8eniKrPMxxtj8k0vhmJhA=="],
 
+    "end-of-stream": ["end-of-stream@1.4.5", "", { "dependencies": { "once": "^1.4.0" } }, "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg=="],
+
     "environment": ["environment@1.1.0", "", {}, "sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q=="],
 
     "es-abstract": ["es-abstract@1.24.0", "", { "dependencies": { "array-buffer-byte-length": "^1.0.2", "arraybuffer.prototype.slice": "^1.0.4", "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.8", "call-bound": "^1.0.4", "data-view-buffer": "^1.0.2", "data-view-byte-length": "^1.0.2", "data-view-byte-offset": "^1.0.1", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "es-set-tostringtag": "^2.1.0", "es-to-primitive": "^1.3.0", "function.prototype.name": "^1.1.8", "get-intrinsic": "^1.3.0", "get-proto": "^1.0.1", "get-symbol-description": "^1.1.0", "globalthis": "^1.0.4", "gopd": "^1.2.0", "has-property-descriptors": "^1.0.2", "has-proto": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "internal-slot": "^1.1.0", "is-array-buffer": "^3.0.5", "is-callable": "^1.2.7", "is-data-view": "^1.0.2", "is-negative-zero": "^2.0.3", "is-regex": "^1.2.1", "is-set": "^2.0.3", "is-shared-array-buffer": "^1.0.4", "is-string": "^1.1.1", "is-typed-array": "^1.1.15", "is-weakref": "^1.1.1", "math-intrinsics": "^1.1.0", "object-inspect": "^1.13.4", "object-keys": "^1.1.1", "object.assign": "^4.1.7", "own-keys": "^1.0.1", "regexp.prototype.flags": "^1.5.4", "safe-array-concat": "^1.1.3", "safe-push-apply": "^1.0.0", "safe-regex-test": "^1.1.0", "set-proto": "^1.0.0", "stop-iteration-iterator": "^1.1.0", "string.prototype.trim": "^1.2.10", "string.prototype.trimend": "^1.0.9", "string.prototype.trimstart": "^1.0.8", "typed-array-buffer": "^1.0.3", "typed-array-byte-length": "^1.0.3", "typed-array-byte-offset": "^1.0.4", "typed-array-length": "^1.0.7", "unbox-primitive": "^1.1.0", "which-typed-array": "^1.1.19" } }, "sha512-WSzPgsdLtTcQwm4CROfS5ju2Wa1QQcVeT37jFjYzdFz1r9ahadC8B8/a4qxJxM+09F18iumCdRmlr96ZYkQvEg=="],
@@ -464,6 +489,8 @@
 
     "execa": ["execa@9.6.0", "", { "dependencies": { "@sindresorhus/merge-streams": "^4.0.0", "cross-spawn": "^7.0.6", "figures": "^6.1.0", "get-stream": "^9.0.0", "human-signals": "^8.0.1", "is-plain-obj": "^4.1.0", "is-stream": "^4.0.1", "npm-run-path": "^6.0.0", "pretty-ms": "^9.2.0", "signal-exit": "^4.1.0", "strip-final-newline": "^4.0.0", "yoctocolors": "^2.1.1" } }, "sha512-jpWzZ1ZhwUmeWRhS7Qv3mhpOhLfwI+uAX4e5fOcXqwMR7EcJ0pj2kV1CVzHVMX/LphnKWD3LObjZCoJ71lKpHw=="],
 
+    "expand-template": ["expand-template@2.0.3", "", {}, "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg=="],
+
     "exsolve": ["exsolve@1.0.7", "", {}, "sha512-VO5fQUzZtI6C+vx4w/4BWJpg3s/5l+6pRQEHzFRM8WFi4XffSP1Z+4qi7GbjWbvRQEbdIco5mIMq+zX4rPuLrw=="],
 
     "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
@@ -488,6 +515,8 @@
 
     "file-entry-cache": ["file-entry-cache@8.0.0", "", { "dependencies": { "flat-cache": "^4.0.0" } }, "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ=="],
 
+    "file-uri-to-path": ["file-uri-to-path@1.0.0", "", {}, "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="],
+
     "fill-range": ["fill-range@7.1.1", "", { "dependencies": { "to-regex-range": "^5.0.1" } }, "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg=="],
 
     "find-up": ["find-up@5.0.0", "", { "dependencies": { "locate-path": "^6.0.0", "path-exists": "^4.0.0" } }, "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng=="],
@@ -502,6 +531,8 @@
 
     "formatly": ["formatly@0.3.0", "", { "dependencies": { "fd-package-json": "^2.0.0" }, "bin": { "formatly": "bin/index.mjs" } }, "sha512-9XNj/o4wrRFyhSMJOvsuyMwy8aUfBaZ1VrqHVfohyXf0Sw0e+yfKG+xZaY3arGCOMdwFsqObtzVOc1gU9KiT9w=="],
 
+    "fs-constants": ["fs-constants@1.0.0", "", {}, "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="],
+
     "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="],
 
     "function.prototype.name": ["function.prototype.name@1.1.8", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "functions-have-names": "^1.2.3", "hasown": "^2.0.2", "is-callable": "^1.2.7" } }, "sha512-e5iwyodOHhbMr/yNrc7fDYG4qlbIvI5gajyzPnb5TCwyhjApznQh1BMFou9b30SevY43gCJKXycoCBjMbsuW0Q=="],
@@ -528,6 +559,8 @@
 
     "git-hooks-list": ["git-hooks-list@4.1.1", "", {}, "sha512-cmP497iLq54AZnv4YRAEMnEyQ1eIn4tGKbmswqwmFV4GBnAqE8NLtWxxdXa++AalfgL5EBH4IxTPyquEuGY/jA=="],
 
+    "github-from-package": ["github-from-package@0.0.0", "", {}, "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="],
+
     "glob-parent": ["glob-parent@6.0.2", "", { "dependencies": { "is-glob": "^4.0.3" } }, "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A=="],
 
     "globals": ["globals@16.4.0", "", {}, "sha512-ob/2LcVVaVGCYN+r14cnwnoDPUufjiYgSqRhiFD0Q1iI4Odora5RE8Iv1D24hAz5oMophRGkGz+yuvQmmUMnMw=="],
@@ -560,6 +593,8 @@
 
     "human-signals": ["human-signals@8.0.1", "", {}, "sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ=="],
 
+    "ieee754": ["ieee754@1.2.1", "", {}, "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="],
+
     "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
 
     "import-fresh": ["import-fresh@3.3.1", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="],
@@ -568,6 +603,10 @@
 
     "indent-string": ["indent-string@5.0.0", "", {}, "sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg=="],
 
+    "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="],
+
+    "ini": ["ini@1.3.8", "", {}, "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="],
+
     "internal-slot": ["internal-slot@1.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "hasown": "^2.0.2", "side-channel": "^1.1.0" } }, "sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw=="],
 
     "is-array-buffer": ["is-array-buffer@3.0.5", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" } }, "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A=="],
@@ -694,18 +733,26 @@
 
     "mimic-function": ["mimic-function@5.0.1", "", {}, "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA=="],
 
+    "mimic-response": ["mimic-response@3.1.0", "", {}, "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ=="],
+
     "minimatch": ["minimatch@3.1.2", "", { "dependencies": { "brace-expansion": "^1.1.7" } }, "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw=="],
 
     "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="],
 
+    "mkdirp-classic": ["mkdirp-classic@0.5.3", "", {}, "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="],
+
     "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
 
     "nano-spawn": ["nano-spawn@1.0.3", "", {}, "sha512-jtpsQDetTnvS2Ts1fiRdci5rx0VYws5jGyC+4IYOTnIQ/wwdf6JdomlHBwqC3bJYOvaKu0C2GSZ1A60anrYpaA=="],
 
+    "napi-build-utils": ["napi-build-utils@2.0.0", "", {}, "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA=="],
+
     "natural-compare": ["natural-compare@1.4.0", "", {}, "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw=="],
 
     "natural-orderby": ["natural-orderby@5.0.0", "", {}, "sha512-kKHJhxwpR/Okycz4HhQKKlhWe4ASEfPgkSWNmKFHd7+ezuQlxkA5cM3+XkBPvm1gmHen3w53qsYAv+8GwRrBlg=="],
 
+    "node-abi": ["node-abi@3.89.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA=="],
+
     "node-fetch-native": ["node-fetch-native@1.6.7", "", {}, "sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q=="],
 
     "node-releases": ["node-releases@2.0.23", "", {}, "sha512-cCmFDMSm26S6tQSDpBCg/NR8NENrVPhAJSf+XbxBG4rPFaaonlEoE9wHQmun+cls499TQGSb7ZyPBRlzgKfpeg=="],
@@ -726,6 +773,8 @@
 
     "ohash": ["ohash@2.0.11", "", {}, "sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ=="],
 
+    "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
+
     "onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
 
     "optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="],
@@ -766,6 +815,8 @@
 
     "possible-typed-array-names": ["possible-typed-array-names@1.1.0", "", {}, "sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg=="],
 
+    "prebuild-install": ["prebuild-install@7.1.3", "", { "dependencies": { "detect-libc": "^2.0.0", "expand-template": "^2.0.3", "github-from-package": "0.0.0", "minimist": "^1.2.3", "mkdirp-classic": "^0.5.3", "napi-build-utils": "^2.0.0", "node-abi": "^3.3.0", "pump": "^3.0.0", "rc": "^1.2.7", "simple-get": "^4.0.0", "tar-fs": "^2.0.0", "tunnel-agent": "^0.6.0" }, "bin": { "prebuild-install": "bin.js" } }, "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug=="],
+
     "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="],
 
     "prettier": ["prettier@3.6.2", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ=="],
@@ -778,14 +829,20 @@
 
     "proxy-from-env": ["proxy-from-env@1.1.0", "", {}, "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="],
 
+    "pump": ["pump@3.0.4", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA=="],
+
     "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="],
 
     "quansync": ["quansync@0.2.11", "", {}, "sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA=="],
 
     "queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="],
 
+    "rc": ["rc@1.2.8", "", { "dependencies": { "deep-extend": "^0.6.0", "ini": "~1.3.0", "minimist": "^1.2.0", "strip-json-comments": "~2.0.1" }, "bin": { "rc": "./cli.js" } }, "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw=="],
+
     "rc9": ["rc9@2.1.2", "", { "dependencies": { "defu": "^6.1.4", "destr": "^2.0.3" } }, "sha512-btXCnMmRIBINM2LDZoEmOogIZU7Qe7zn4BpomSKZ/ykbLObuBdvG+mFq11DL6fjH1DRwHhrlgtYWG96bJiC7Cg=="],
 
+    "readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="],
+
     "readdirp": ["readdirp@4.1.2", "", {}, "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg=="],
 
     "refa": ["refa@0.12.1", "", { "dependencies": { "@eslint-community/regexpp": "^4.8.0" } }, "sha512-J8rn6v4DBb2nnFqkqwy6/NnTYMcgLA+sLr0iIO41qpv0n+ngb7ksag2tMRl0inb1bbO/esUwzW1vbJi7K0sI0g=="],
@@ -818,6 +875,8 @@
 
     "safe-array-concat": ["safe-array-concat@1.1.3", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", "get-intrinsic": "^1.2.6", "has-symbols": "^1.1.0", "isarray": "^2.0.5" } }, "sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q=="],
 
+    "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="],
+
     "safe-push-apply": ["safe-push-apply@1.0.0", "", { "dependencies": { "es-errors": "^1.3.0", "isarray": "^2.0.5" } }, "sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA=="],
 
     "safe-regex-test": ["safe-regex-test@1.1.0", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "is-regex": "^1.2.1" } }, "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw=="],
@@ -846,6 +905,10 @@
 
     "signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
 
+    "simple-concat": ["simple-concat@1.0.1", "", {}, "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q=="],
+
+    "simple-get": ["simple-get@4.0.1", "", { "dependencies": { "decompress-response": "^6.0.0", "once": "^1.3.1", "simple-concat": "^1.0.0" } }, "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA=="],
+
     "simple-git-hooks": ["simple-git-hooks@2.13.1", "", { "bin": { "simple-git-hooks": "cli.js" } }, "sha512-WszCLXwT4h2k1ufIXAgsbiTOazqqevFCIncOuUBZJ91DdvWcC5+OFkluWRQPrcuSYd8fjq+o2y1QfWqYMoAToQ=="],
 
     "slice-ansi": ["slice-ansi@7.1.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "is-fullwidth-code-point": "^5.0.0" } }, "sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w=="],
@@ -882,6 +945,8 @@
 
     "string.prototype.trimstart": ["string.prototype.trimstart@1.0.8", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg=="],
 
+    "string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="],
+
     "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="],
 
     "strip-final-newline": ["strip-final-newline@4.0.0", "", {}, "sha512-aulFJcD6YK8V1G7iRB5tigAP4TsHBZZrOV8pjV++zdUwmeV8uzbY7yn6h9MswN62adStNZFuCIx4haBnRuMDaw=="],
@@ -896,6 +961,10 @@
 
     "system-architecture": ["system-architecture@0.1.0", "", {}, "sha512-ulAk51I9UVUyJgxlv9M6lFot2WP3e7t8Kz9+IS6D4rVba1tR9kON+Ey69f+1R4Q8cd45Lod6a4IcJIxnzGc/zA=="],
 
+    "tar-fs": ["tar-fs@2.1.4", "", { "dependencies": { "chownr": "^1.1.1", "mkdirp-classic": "^0.5.2", "pump": "^3.0.0", "tar-stream": "^2.1.4" } }, "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ=="],
+
+    "tar-stream": ["tar-stream@2.2.0", "", { "dependencies": { "bl": "^4.0.3", "end-of-stream": "^1.4.1", "fs-constants": "^1.0.0", "inherits": "^2.0.3", "readable-stream": "^3.1.1" } }, "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ=="],
+
     "tiny-invariant": ["tiny-invariant@1.3.3", "", {}, "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg=="],
 
     "tinyexec": ["tinyexec@1.0.1", "", {}, "sha512-5uC6DDlmeqiOwCPmK9jMSdOuZTh8bU39Ys6yidB+UTt5hfZUPGAypSgFRiEp+jbi9qH40BLDvy85jIU88wKSqw=="],
@@ -916,6 +985,8 @@
 
     "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
 
+    "tunnel-agent": ["tunnel-agent@0.6.0", "", { "dependencies": { "safe-buffer": "^5.0.1" } }, "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w=="],
+
     "type-check": ["type-check@0.4.0", "", { "dependencies": { "prelude-ls": "^1.2.1" } }, "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew=="],
 
     "typed-array-buffer": ["typed-array-buffer@1.0.3", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "is-typed-array": "^1.1.14" } }, "sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw=="],
@@ -944,6 +1015,8 @@
 
     "uri-js": ["uri-js@4.4.1", "", { "dependencies": { "punycode": "^2.1.0" } }, "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg=="],
 
+    "util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="],
+
     "validate-npm-package-license": ["validate-npm-package-license@3.0.4", "", { "dependencies": { "spdx-correct": "^3.0.0", "spdx-expression-parse": "^3.0.0" } }, "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew=="],
 
     "validate-npm-package-name": ["validate-npm-package-name@6.0.2", "", {}, "sha512-IUoow1YUtvoBBC06dXs8bR8B9vuA3aJfmQNKMoaPG/OFsPmoQvw8xh+6Ye25Gx9DQhoEom3Pcu9MKHerm/NpUQ=="],
@@ -964,6 +1037,8 @@
 
     "wrap-ansi": ["wrap-ansi@9.0.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "string-width": "^7.0.0", "strip-ansi": "^7.1.0" } }, "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww=="],
 
+    "wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="],
+
     "y18n": ["y18n@5.0.8", "", {}, "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA=="],
 
     "yaml": ["yaml@2.8.1", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw=="],
@@ -1008,6 +1083,8 @@
 
     "npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="],
 
+    "rc/strip-json-comments": ["strip-json-comments@2.0.1", "", {}, "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ=="],
+
     "regjsparser/jsesc": ["jsesc@3.0.2", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g=="],
 
     "slice-ansi/ansi-styles": ["ansi-styles@6.2.3", "", {}, "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg=="],
diff --git a/docs/tasks/04-with-account-wrapper.md b/docs/tasks/04-with-account-wrapper.md
deleted file mode 100644
index 30dc2bafe..000000000
--- a/docs/tasks/04-with-account-wrapper.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Task 04 — Handler `withAccount` wrapper
-
-**Depends on:** 03
-**Unblocks:** 07, 08, 09, 10
-
-## Goal
-
-Replace the inline `acquire/release` placeholder from task 03 with a single
-`withAccount` helper that handles retry, cooldown, abort, and the
-`x-internal-pricing-sync` exemption.
-
-## Scope
-
-New file `src/lib/with-account.ts`:
-
-```ts
-export async function withAccount<T>(
-  c: Context,
-  fn: (account: Account) => Promise<T>,
-): Promise<T> {
-  const isInternal = c.req.header('x-internal-pricing-sync') === '1'
-  const maxRetries = Math.min(state.pool.size(), 3)
-  let lastErr: unknown
-  for (let attempt = 0; attempt < maxRetries; attempt++) {
-    const account = await state.pool.acquire()
-    try {
-      const out = await fn(account)
-      account.consecutiveFailures = 0
-      return out
-    } catch (e) {
-      lastErr = e
-      if (isClientError(e)) throw e             // 4xx (non-401) — no retry
-      if (isAuthError(e))   triggerRefresh(account) // 401 — refresh, then retry
-      else                  state.pool.markCooldown(account, 30_000) // 5xx / network
-    } finally {
-      state.pool.release(account)
-    }
-  }
-  throw lastErr
-}
-```
-
-Update each handler to:
-
-```ts
-return withAccount(c, async (account) => {
-  // ...existing logic with `account` threaded into service call...
-})
-```
-
-Streaming handlers must not retry once the SSE response has begun flushing.
-Either:
-
-- Detect "headers already sent" and rethrow without rotating, OR
-- Wrap retry only around the `fetch()` upstream call, and once events start
-  flowing, abort retry.
-
-## Definition of Done
-
-- [ ] `withAccount` is the only place that calls `pool.acquire/release`
-  outside startup code.
-- [ ] Unit test: forcing a 401 once causes one retry against a different
-  account (use a dummy pool of two accounts).
-- [ ] Unit test: forcing a 4xx never retries.
-- [ ] Manual smoke: kill one account's token mid-flight; new requests succeed
-  on the other account; the dead account enters cooldown.
-- [ ] Internal `x-internal-pricing-sync: 1` requests bypass nothing in this
-  task (the exemption only matters for the recorder in task 06).
diff --git a/package.json b/package.json
index a5adbb8e7..3124013d9 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
-  "name": "copilot-api",
-  "version": "0.7.0",
+  "name": "@weavejam/copilot-proxy",
+  "version": "0.8.1",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "keywords": [
     "proxy",
@@ -13,10 +13,10 @@
     "type": "git",
     "url": "git+https://github.com/ericc-ch/copilot-api.git"
   },
-  "author": "Erick Christian <erickchristian48@gmail.com>",
+  "author": "Bo Lu <lubo@weavejam.com>",
   "type": "module",
   "bin": {
-    "copilot-api": "./dist/main.js"
+    "copilot-proxy": "./dist/main.js"
   },
   "files": [
     "dist"
@@ -40,6 +40,7 @@
     "*": "bun run lint --fix"
   },
   "dependencies": {
+    "better-sqlite3": "^12.9.0",
     "citty": "^0.1.6",
     "clipboardy": "^5.0.0",
     "consola": "^3.4.2",
@@ -54,6 +55,7 @@
   },
   "devDependencies": {
     "@echristian/eslint-config": "^0.0.54",
+    "@types/better-sqlite3": "^7.6.13",
     "@types/bun": "^1.2.23",
     "@types/proxy-from-env": "^1.0.4",
     "bumpp": "^10.2.3",
diff --git a/src/lib/accounts-loader.ts b/src/lib/accounts-loader.ts
index 2bccecc6f..ef3656343 100644
--- a/src/lib/accounts-loader.ts
+++ b/src/lib/accounts-loader.ts
@@ -20,6 +20,7 @@ export interface AccountsFile {
 export interface LoadAccountsOptions {
   accountsFile?: string
   legacyToken?: string
+  legacyTokens?: Array<AccountsFileEntry>
   defaultAccountType: string
 }
 
@@ -38,6 +39,55 @@ const FRESH = (): Pick<
   failureCount: 0,
 })
 
+/**
+ * Parse a single `--github-token` segment with format `name:type:token`.
+ *
+ * - 1 segment  → pure token, name=`account-{index}`, type=defaultType
+ * - 2 segments → `name:token`, type=defaultType
+ * - 3+ segments → `name:type:token` (token may contain `:`)
+ */
+export function parseGithubTokenArg(
+  raw: string,
+  index: number,
+  defaultType: string,
+): AccountsFileEntry {
+  const parts = raw.split(":")
+  if (parts.length === 1) {
+    return {
+      name: `account-${index + 1}`,
+      github_token: parts[0],
+      account_type: defaultType,
+    }
+  }
+  if (parts.length === 2) {
+    return {
+      name: parts[0],
+      github_token: parts[1],
+      account_type: defaultType,
+    }
+  }
+  // 3+ segments: name:type:token (token may contain colons)
+  return {
+    name: parts[0],
+    account_type: parts[1],
+    github_token: parts.slice(2).join(":"),
+  }
+}
+
+/**
+ * Parse a comma-separated `--github-token` value into multiple account entries.
+ */
+export function parseGithubTokenArgs(
+  raw: string,
+  defaultType: string,
+): Array<AccountsFileEntry> {
+  return raw
+    .split(",")
+    .map((s) => s.trim())
+    .filter((s) => s.length > 0)
+    .map((s, i) => parseGithubTokenArg(s, i, defaultType))
+}
+
 export async function loadAccounts(
   options: LoadAccountsOptions,
 ): Promise<Array<Account>> {
@@ -54,6 +104,15 @@ export async function loadAccounts(
         ...FRESH(),
       })
     }
+  } else if (options.legacyTokens && options.legacyTokens.length > 0) {
+    for (const entry of options.legacyTokens) {
+      accounts.push({
+        name: entry.name,
+        accountType: entry.account_type ?? options.defaultAccountType,
+        githubToken: entry.github_token,
+        ...FRESH(),
+      })
+    }
   } else if (options.legacyToken && options.legacyToken.length > 0) {
     accounts.push({
       name: "default",
diff --git a/src/lib/db.ts b/src/lib/db.ts
index 8df341dea..a3decb906 100644
--- a/src/lib/db.ts
+++ b/src/lib/db.ts
@@ -1,8 +1,8 @@
-import { Database } from "bun:sqlite"
 import fs from "node:fs"
 import path from "node:path"
 
 import migration001 from "./migrations/001_initial.sql" with { type: "text" }
+import { createDatabase, type DbInstance } from "./sqlite-adapter"
 
 export const CURRENT_SCHEMA_VERSION = 1
 
@@ -10,21 +10,21 @@ const MIGRATIONS: Array<{ version: number; sql: string }> = [
   { version: 1, sql: migration001 },
 ]
 
-let dbInstance: Database | undefined
+let dbInstance: DbInstance | undefined
 
-export function initDb(dbPath: string): Database {
+export function initDb(dbPath: string): DbInstance {
   if (dbInstance) return dbInstance
 
   if (dbPath !== ":memory:") {
     fs.mkdirSync(path.dirname(dbPath), { recursive: true })
   }
 
-  const db = new Database(dbPath, { create: true })
+  const db = createDatabase(dbPath)
 
   // Pragmas — set before any schema work.
-  db.run("PRAGMA journal_mode = WAL")
-  db.run("PRAGMA synchronous = NORMAL")
-  db.run("PRAGMA foreign_keys = ON")
+  db.pragma("journal_mode = WAL")
+  db.pragma("synchronous = NORMAL")
+  db.pragma("foreign_keys = ON")
 
   runMigrations(db)
 
@@ -32,7 +32,7 @@ export function initDb(dbPath: string): Database {
   return db
 }
 
-export function getDb(): Database {
+export function getDb(): DbInstance {
   if (!dbInstance) {
     throw new Error(
       "Database not initialized. Call initDb(path) before getDb().",
@@ -41,7 +41,7 @@ export function getDb(): Database {
   return dbInstance
 }
 
-export function withTransaction<T>(fn: (db: Database) => T): T {
+export function withTransaction<T>(fn: (db: DbInstance) => T): T {
   const db = getDb()
   const tx = db.transaction((arg: () => T) => arg())
   return tx(() => fn(db))
@@ -63,18 +63,15 @@ export function __resetDbForTests(): void {
   }
 }
 
-function runMigrations(db: Database): void {
+function runMigrations(db: DbInstance): void {
   // Bootstrap meta table so we can read schema_version.
-  db.run(
+  db.exec(
     "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
   )
 
   const row = db
-    .query<
-      { value: string },
-      []
-    >("SELECT value FROM meta WHERE key='schema_version'")
-    .get()
+    .prepare("SELECT value FROM meta WHERE key='schema_version'")
+    .get() as { value: string } | undefined
   const currentVersion = row ? Number.parseInt(row.value, 10) : 0
 
   const pending = MIGRATIONS.filter((m) => m.version > currentVersion).sort(
@@ -85,15 +82,14 @@ function runMigrations(db: Database): void {
 
   const apply = db.transaction(() => {
     for (const m of pending) {
-      // Migration SQL contains multiple statements; only exec() handles that.
-      // eslint-disable-next-line @typescript-eslint/no-deprecated
       db.exec(m.sql)
     }
-    db.run(
-      "INSERT INTO meta (key, value) VALUES ('schema_version', ?) "
+    db.prepare(
+      "INSERT INTO meta (key, value) VALUES (?, ?) "
         + "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
-      [String(CURRENT_SCHEMA_VERSION)],
-    )
+    ).run("schema_version", String(CURRENT_SCHEMA_VERSION))
   })
   apply()
 }
+
+export { type DbInstance } from "./sqlite-adapter"
diff --git a/src/lib/pricing-scheduler.ts b/src/lib/pricing-scheduler.ts
index eed91d4ca..07f759fda 100644
--- a/src/lib/pricing-scheduler.ts
+++ b/src/lib/pricing-scheduler.ts
@@ -12,11 +12,8 @@ export interface SchedulePricingSyncOptions {
 function readLastSync(): number {
   try {
     const row = getDb()
-      .query<
-        { value: string },
-        []
-      >("SELECT value FROM meta WHERE key = 'last_pricing_sync_ts'")
-      .get()
+      .prepare("SELECT value FROM meta WHERE key = 'last_pricing_sync_ts'")
+      .get() as { value: string } | undefined
     if (!row) return 0
     return Number.parseInt(row.value, 10) || 0
   } catch {
diff --git a/src/lib/pricing-sync-runner.ts b/src/lib/pricing-sync-runner.ts
index a5a333f01..f3b790e42 100644
--- a/src/lib/pricing-sync-runner.ts
+++ b/src/lib/pricing-sync-runner.ts
@@ -42,14 +42,14 @@ interface CurrentRow {
 
 function selectCurrentVersion(modelId: string): CurrentRow | undefined {
   return (
-    getDb()
-      .query<CurrentRow, [string]>(
+    (getDb()
+      .prepare(
         `SELECT id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
               reasoning_per_mtok, premium_multiplier, premium_unit_price
          FROM model_pricing_versions
         WHERE model_id = ? AND effective_to IS NULL`,
       )
-      .get(modelId) ?? undefined
+      .get(modelId) as CurrentRow | undefined) ?? undefined
   )
 }
 
@@ -82,35 +82,33 @@ function applyPricingChange(args: ApplyArgs): "changed" | "unchanged" {
     return "unchanged"
   }
   if (current) {
-    db.run("UPDATE model_pricing_versions SET effective_to = ? WHERE id = ?", [
-      args.detectedAt,
-      current.id,
-    ])
+    db.prepare(
+      "UPDATE model_pricing_versions SET effective_to = ? WHERE id = ?",
+    ).run(args.detectedAt, current.id)
   }
-  db.run(
+  db.prepare(
     `INSERT INTO model_pricing_versions (
        model_id, effective_from, effective_to,
        input_per_mtok, cached_input_per_mtok, output_per_mtok,
        reasoning_per_mtok, premium_multiplier, premium_unit_price,
        currency, source, source_skus, sync_log_id, created_at
      ) VALUES (?, ?, NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
-    [
-      args.row.model_id,
-      args.detectedAt,
-      newRow.input_per_mtok,
-      newRow.cached_input_per_mtok,
-      newRow.output_per_mtok,
-      newRow.reasoning_per_mtok,
-      newRow.premium_multiplier,
-      newRow.premium_unit_price,
-      args.row.currency ?? "USD",
-      args.row.source ?? null,
-      args.row.source_skus ? JSON.stringify(args.row.source_skus) : null,
-      args.syncLogId,
-      args.detectedAt,
-    ],
+  ).run(
+    args.row.model_id,
+    args.detectedAt,
+    newRow.input_per_mtok,
+    newRow.cached_input_per_mtok,
+    newRow.output_per_mtok,
+    newRow.reasoning_per_mtok,
+    newRow.premium_multiplier,
+    newRow.premium_unit_price,
+    args.row.currency ?? "USD",
+    args.row.source ?? null,
+    args.row.source_skus ? JSON.stringify(args.row.source_skus) : null,
+    args.syncLogId,
+    args.detectedAt,
   )
-  db.run(
+  db.prepare(
     `INSERT INTO model_pricing (
        model_id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
        reasoning_per_mtok, premium_multiplier, premium_unit_price,
@@ -127,19 +125,18 @@ function applyPricingChange(args: ApplyArgs): "changed" | "unchanged" {
        source = excluded.source,
        source_skus = excluded.source_skus,
        updated_at = excluded.updated_at`,
-    [
-      args.row.model_id,
-      newRow.input_per_mtok,
-      newRow.cached_input_per_mtok,
-      newRow.output_per_mtok,
-      newRow.reasoning_per_mtok,
-      newRow.premium_multiplier,
-      newRow.premium_unit_price,
-      args.row.currency ?? "USD",
-      args.row.source ?? null,
-      args.row.source_skus ? JSON.stringify(args.row.source_skus) : null,
-      args.detectedAt,
-    ],
+  ).run(
+    args.row.model_id,
+    newRow.input_per_mtok,
+    newRow.cached_input_per_mtok,
+    newRow.output_per_mtok,
+    newRow.reasoning_per_mtok,
+    newRow.premium_multiplier,
+    newRow.premium_unit_price,
+    args.row.currency ?? "USD",
+    args.row.source ?? null,
+    args.row.source_skus ? JSON.stringify(args.row.source_skus) : null,
+    args.detectedAt,
   )
   return "changed"
 }
@@ -218,15 +215,17 @@ export async function runPricingSync(
       })
       if (result === "changed") updated += 1
     }
-    getDb().run(
-      `INSERT INTO meta (key, value) VALUES ('last_pricing_sync_ts', ?)
+    getDb()
+      .prepare(
+        `INSERT INTO meta (key, value) VALUES ('last_pricing_sync_ts', ?)
        ON CONFLICT(key) DO UPDATE SET value = excluded.value`,
-      [String(detectedAt)],
-    )
-    getDb().run(
-      "UPDATE pricing_sync_log SET models_updated = ?, source_count = ? WHERE id = ?",
-      [updated, parsed.models.length, logId],
-    )
+      )
+      .run(String(detectedAt))
+    getDb()
+      .prepare(
+        "UPDATE pricing_sync_log SET models_updated = ?, source_count = ? WHERE id = ?",
+      )
+      .run(updated, parsed.models.length, logId)
   })
   tx()
 
@@ -248,7 +247,7 @@ function recordSyncLog(args: RecordSyncLogArgs): number {
        (ts, status, llm_model, models_updated, models_rejected, error)
      VALUES (?, ?, ?, ?, ?, ?)`,
   )
-  stmt.run(
+  const result = stmt.run(
     args.ts,
     args.status,
     args.llmModel,
@@ -256,8 +255,5 @@ function recordSyncLog(args: RecordSyncLogArgs): number {
     args.modelsRejected,
     args.error ?? null,
   )
-  const idRow = getDb()
-    .query<{ id: number }, []>("SELECT last_insert_rowid() AS id")
-    .get()
-  return idRow?.id ?? 0
+  return Number(result.lastInsertRowid)
 }
diff --git a/src/lib/pricing-sync.ts b/src/lib/pricing-sync.ts
index b7a526a0d..14aa86cd3 100644
--- a/src/lib/pricing-sync.ts
+++ b/src/lib/pricing-sync.ts
@@ -132,6 +132,9 @@ Rules:
 - Use USD; if a row is in another currency, convert to USD only if obvious, otherwise omit.
 - "per_mtok" means dollars per 1,000,000 tokens. Convert per-1k or per-token rates accordingly.
 - premium_multiplier and premium_unit_price come from GitHub Copilot premium pricing — do not invent.
+- For Anthropic models, the pricing page lists "Prompt caching" with "Write" and "Read" prices.
+  "Read" is the cached_input_per_mtok. "Write" is the cache-write cost (ignore it — we don't track it).
+  If no caching prices are listed, leave cached_input_per_mtok as null.
 - Leave fields you cannot confidently derive as null. Do not guess.
 - Output a single JSON object. No markdown fences, no commentary.`
 
diff --git a/src/lib/sqlite-adapter.ts b/src/lib/sqlite-adapter.ts
new file mode 100644
index 000000000..937050f81
--- /dev/null
+++ b/src/lib/sqlite-adapter.ts
@@ -0,0 +1,110 @@
+/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-return, @typescript-eslint/no-require-imports, unicorn/prefer-module, @typescript-eslint/no-unnecessary-type-parameters */
+/**
+ * Runtime-adaptive SQLite adapter.
+ *
+ * Uses `bun:sqlite` when running under Bun, `better-sqlite3` otherwise (Node.js).
+ * Exposes a unified interface that matches the subset we actually use.
+ */
+
+export interface DbStatement {
+  run(...params: Array<unknown>): { lastInsertRowid: number | bigint }
+  get(...params: Array<unknown>): unknown
+  all(...params: Array<unknown>): Array<unknown>
+}
+
+export interface DbInstance {
+  prepare(sql: string): DbStatement
+  exec(sql: string): void
+  pragma(pragma: string): unknown
+  transaction<T>(
+    fn: (...args: Array<unknown>) => T,
+  ): (...args: Array<unknown>) => T
+  close(): void
+}
+
+const isBun = typeof globalThis.Bun !== "undefined"
+
+export function createDatabase(dbPath: string): DbInstance {
+  if (isBun) {
+    return createBunDatabase(dbPath)
+  }
+  return createBetterSqlite3Database(dbPath)
+}
+
+function createBunDatabase(dbPath: string): DbInstance {
+  // Dynamic import to avoid bundler resolving it on Node
+  const { Database } = require("bun:sqlite")
+  const db = new Database(dbPath, { create: true })
+
+  return {
+    prepare(sql: string): DbStatement {
+      const stmt = db.query(sql)
+      return {
+        run(...params: Array<unknown>) {
+          stmt.run(...params)
+          // bun:sqlite doesn't return lastInsertRowid from run(),
+          // but we can query it separately when needed
+          const row = db.query("SELECT last_insert_rowid() AS id").get()
+          return { lastInsertRowid: row?.id ?? 0 }
+        },
+        get(...params: Array<unknown>) {
+          return stmt.get(...params) ?? undefined
+        },
+        all(...params: Array<unknown>) {
+          return stmt.all(...params)
+        },
+      }
+    },
+    exec(sql: string) {
+      db.exec(sql)
+    },
+    pragma(pragma: string) {
+      // bun:sqlite uses db.query("PRAGMA ...").get() for read pragmas
+      if (pragma.includes("=")) {
+        db.run(`PRAGMA ${pragma}`)
+        return undefined
+      }
+      return db.query(`PRAGMA ${pragma}`).get()
+    },
+    transaction<T>(fn: (...args: Array<unknown>) => T) {
+      return db.transaction(fn)
+    },
+    close() {
+      db.close()
+    },
+  }
+}
+
+function createBetterSqlite3Database(dbPath: string): DbInstance {
+  const BetterSqlite3 = require("better-sqlite3")
+  const db = new BetterSqlite3(dbPath)
+
+  return {
+    prepare(sql: string): DbStatement {
+      const stmt = db.prepare(sql)
+      return {
+        run(...params: Array<unknown>) {
+          return stmt.run(...params)
+        },
+        get(...params: Array<unknown>) {
+          return stmt.get(...params) ?? undefined
+        },
+        all(...params: Array<unknown>) {
+          return stmt.all(...params)
+        },
+      }
+    },
+    exec(sql: string) {
+      db.exec(sql)
+    },
+    pragma(pragma: string) {
+      return db.pragma(pragma)
+    },
+    transaction<T>(fn: (...args: Array<unknown>) => T) {
+      return db.transaction(fn)
+    },
+    close() {
+      db.close()
+    },
+  }
+}
diff --git a/src/lib/usage-recorder.ts b/src/lib/usage-recorder.ts
index 2debabca9..a3395ffcd 100644
--- a/src/lib/usage-recorder.ts
+++ b/src/lib/usage-recorder.ts
@@ -44,7 +44,7 @@ export function recordUsage(input: RecordUsageInput): void {
     const ts = Date.now()
 
     const pricing = db
-      .query<PricingRow, [string]>(
+      .prepare(
         `SELECT input_per_mtok,
                 cached_input_per_mtok,
                 output_per_mtok,
@@ -54,7 +54,7 @@ export function recordUsage(input: RecordUsageInput): void {
            FROM model_pricing
           WHERE model_id = ?`,
       )
-      .get(input.modelId)
+      .get(input.modelId) as PricingRow | undefined
 
     const inputPrice = pricing?.input_per_mtok ?? null
     const cachedInputPrice = pricing?.cached_input_per_mtok ?? null
diff --git a/src/lib/usage-stats.ts b/src/lib/usage-stats.ts
index 9ed6f9b87..f44d4eb0c 100644
--- a/src/lib/usage-stats.ts
+++ b/src/lib/usage-stats.ts
@@ -1,5 +1,3 @@
-import type { Database } from "bun:sqlite"
-
 import { getDb } from "./db"
 
 export type Lens = "historical" | "current" | "timeline"
@@ -55,30 +53,30 @@ const COST_EXPRESSIONS: Record<Lens, { table: string; cost: string }> = {
   historical: {
     table: "usage_events ue",
     cost: `(
-      ue.input_tokens / 1e6 * ue.input_price_snapshot
-      + ue.cached_input_tokens / 1e6 * ue.cached_input_price_snapshot
-      + ue.output_tokens / 1e6 * ue.output_price_snapshot
-      + ue.reasoning_tokens / 1e6 * ue.reasoning_price_snapshot
+      ue.input_tokens / 1e6 * COALESCE(ue.input_price_snapshot, 0)
+      + ue.cached_input_tokens / 1e6 * COALESCE(ue.cached_input_price_snapshot, 0)
+      + ue.output_tokens / 1e6 * COALESCE(ue.output_price_snapshot, 0)
+      + ue.reasoning_tokens / 1e6 * COALESCE(ue.reasoning_price_snapshot, 0)
     )`,
   },
   current: {
     table:
       "usage_events ue LEFT JOIN model_pricing mp ON mp.model_id = ue.model_id",
     cost: `(
-      ue.input_tokens / 1e6 * mp.input_per_mtok
-      + ue.cached_input_tokens / 1e6 * mp.cached_input_per_mtok
-      + ue.output_tokens / 1e6 * mp.output_per_mtok
-      + ue.reasoning_tokens / 1e6 * mp.reasoning_per_mtok
+      ue.input_tokens / 1e6 * COALESCE(mp.input_per_mtok, 0)
+      + ue.cached_input_tokens / 1e6 * COALESCE(mp.cached_input_per_mtok, 0)
+      + ue.output_tokens / 1e6 * COALESCE(mp.output_per_mtok, 0)
+      + ue.reasoning_tokens / 1e6 * COALESCE(mp.reasoning_per_mtok, 0)
     )`,
   },
   timeline: {
     table:
       "usage_events ue LEFT JOIN model_pricing_versions pv ON pv.model_id = ue.model_id AND ue.ts >= pv.effective_from AND (pv.effective_to IS NULL OR ue.ts < pv.effective_to)",
     cost: `(
-      ue.input_tokens / 1e6 * pv.input_per_mtok
-      + ue.cached_input_tokens / 1e6 * pv.cached_input_per_mtok
-      + ue.output_tokens / 1e6 * pv.output_per_mtok
-      + ue.reasoning_tokens / 1e6 * pv.reasoning_per_mtok
+      ue.input_tokens / 1e6 * COALESCE(pv.input_per_mtok, 0)
+      + ue.cached_input_tokens / 1e6 * COALESCE(pv.cached_input_per_mtok, 0)
+      + ue.output_tokens / 1e6 * COALESCE(pv.output_per_mtok, 0)
+      + ue.reasoning_tokens / 1e6 * COALESCE(pv.reasoning_per_mtok, 0)
     )`,
   },
 }
@@ -219,17 +217,16 @@ function buildByAccount(
 }
 
 export function computeUsageStats(filters: UsageStatsFilters): UsageStats {
-  const db: Database = getDb()
+  const db = getDb()
   const { table, cost } = COST_EXPRESSIONS[filters.lens]
   const filter = buildFilter(filters)
 
   const totalsRow =
-    db
-      .query<
-        AggregateRow,
-        Array<string | number>
-      >(`SELECT ${COMMON_AGGREGATE(cost)} FROM ${table} WHERE ${filter.sql}`)
-      .get(...filter.params)
+    (db
+      .prepare(
+        `SELECT ${COMMON_AGGREGATE(cost)} FROM ${table} WHERE ${filter.sql}`,
+      )
+      .get(...filter.params) as AggregateRow | undefined)
     ?? ({
       input_tokens: 0,
       cached_input_tokens: 0,
@@ -241,27 +238,27 @@ export function computeUsageStats(filters: UsageStatsFilters): UsageStats {
     } as AggregateRow)
 
   const byAccountRows = db
-    .query<ByAccountRow, Array<string | number>>(
+    .prepare(
       `SELECT ue.account_name, ${COMMON_AGGREGATE(cost)}
        FROM ${table}
        WHERE ${filter.sql}
        GROUP BY ue.account_name
        ORDER BY ue.account_name`,
     )
-    .all(...filter.params)
+    .all(...filter.params) as Array<ByAccountRow>
 
   const byAccountModelRows = db
-    .query<ByAccountModelRow, Array<string | number>>(
+    .prepare(
       `SELECT ue.account_name, ue.model_id, ue.endpoint, ${COMMON_AGGREGATE(cost)}
        FROM ${table}
        WHERE ${filter.sql}
        GROUP BY ue.account_name, ue.model_id, ue.endpoint
        ORDER BY ue.account_name, ue.model_id, ue.endpoint`,
     )
-    .all(...filter.params)
+    .all(...filter.params) as Array<ByAccountModelRow>
 
   const dailyRows = db
-    .query<DailyRow, Array<string | number>>(
+    .prepare(
       `SELECT date(ue.ts/1000, 'unixepoch', 'localtime') AS day,
               ue.account_name, ue.model_id, ${COMMON_AGGREGATE(cost)}
        FROM ${table}
@@ -269,16 +266,17 @@ export function computeUsageStats(filters: UsageStatsFilters): UsageStats {
        GROUP BY day, ue.account_name, ue.model_id
        ORDER BY day, ue.account_name, ue.model_id`,
     )
-    .all(...filter.params)
+    .all(...filter.params) as Array<DailyRow>
 
-  const missing = db
-    .query<{ model_id: string }, [number, number]>(
-      `SELECT DISTINCT model_id FROM usage_events
+  const missing = (
+    db
+      .prepare(
+        `SELECT DISTINCT model_id FROM usage_events
        WHERE model_id NOT IN (SELECT model_id FROM model_pricing)
          AND ts BETWEEN ? AND ?`,
-    )
-    .all(filters.from, filters.to)
-    .map((r) => r.model_id)
+      )
+      .all(filters.from, filters.to) as Array<{ model_id: string }>
+  ).map((r) => r.model_id)
 
   return {
     range: { from: filters.from, to: filters.to },
diff --git a/src/pricing-sync-cmd.ts b/src/pricing-sync-cmd.ts
index 55faa6cc4..9cbf1b29d 100644
--- a/src/pricing-sync-cmd.ts
+++ b/src/pricing-sync-cmd.ts
@@ -2,7 +2,7 @@ import { defineCommand } from "citty"
 import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
 
-import { loadAccounts } from "./lib/accounts-loader"
+import { loadAccounts, parseGithubTokenArgs } from "./lib/accounts-loader"
 import { initDb } from "./lib/db"
 import { ensurePaths, PATHS } from "./lib/paths"
 import { runPricingSync } from "./lib/pricing-sync-runner"
@@ -38,13 +38,22 @@ async function bootstrapServer(
   initDb(options.dbPath)
   await cacheVSCodeVersion()
 
-  let legacyToken = options.githubToken
-  if (!options.accountsFile && !legacyToken) {
-    legacyToken = await setupGitHubToken()
+  const multiTokenEntries =
+    options.githubToken?.includes(",") ?
+      parseGithubTokenArgs(options.githubToken, options.accountType)
+    : undefined
+
+  let legacyToken: string | undefined
+  if (!multiTokenEntries && !options.accountsFile) {
+    legacyToken = options.githubToken
+    if (!legacyToken) {
+      legacyToken = await setupGitHubToken()
+    }
   }
 
   const loaded = await loadAccounts({
     accountsFile: options.accountsFile,
+    legacyTokens: multiTokenEntries,
     legacyToken,
     defaultAccountType: options.accountType,
   })
@@ -109,7 +118,8 @@ export const pricingSyncCmd = defineCommand({
     "github-token": {
       alias: "g",
       type: "string",
-      description: "GitHub token",
+      description:
+        "GitHub token(s). Supports comma-separated multi-token format: name:type:token",
     },
     "accounts-file": {
       type: "string",
diff --git a/src/routes/usage/route.ts b/src/routes/usage/route.ts
index 276268cd4..0acc35190 100644
--- a/src/routes/usage/route.ts
+++ b/src/routes/usage/route.ts
@@ -90,21 +90,21 @@ function fetchPricingMeta(): {
   try {
     const db = getDb()
     const models = db
-      .query<PricingEntry, []>(
+      .prepare(
         `SELECT model_id, input_per_mtok, cached_input_per_mtok,
                 output_per_mtok, reasoning_per_mtok,
                 premium_multiplier, premium_unit_price,
                 source, updated_at
          FROM model_pricing ORDER BY model_id`,
       )
-      .all()
+      .all() as Array<PricingEntry>
     const lastSync =
-      db
-        .query<SyncLogEntry, []>(
+      (db
+        .prepare(
           `SELECT id, ts, status, llm_model, models_updated, models_rejected, error
          FROM pricing_sync_log ORDER BY id DESC LIMIT 1`,
         )
-        .get() ?? null
+        .get() as SyncLogEntry | undefined) ?? null
     return { models, lastSync }
   } catch {
     return { models: [], lastSync: null }
diff --git a/src/start.ts b/src/start.ts
index 701946373..d63f0fe85 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -7,7 +7,11 @@ import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
 import { AccountPool, type Strategy } from "./lib/account-pool"
-import { loadAccounts, persistAccounts } from "./lib/accounts-loader"
+import {
+  loadAccounts,
+  parseGithubTokenArgs,
+  persistAccounts,
+} from "./lib/accounts-loader"
 import { initDb } from "./lib/db"
 import { ensurePaths, PATHS } from "./lib/paths"
 import { schedulePricingSync } from "./lib/pricing-scheduler"
@@ -106,16 +110,25 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   initDb(options.dbPath)
   await cacheVSCodeVersion()
 
-  // Resolve legacy single token if no accounts file is provided.
-  let legacyToken = options.githubToken
-  if (!options.accountsFile && !legacyToken) {
-    legacyToken = await setupGitHubToken()
-  } else if (legacyToken) {
-    consola.info("Using provided GitHub token")
+  // Resolve accounts: multi-token CLI → accounts file → single token → interactive
+  const multiTokenEntries =
+    options.githubToken?.includes(",") ?
+      parseGithubTokenArgs(options.githubToken, options.accountType)
+    : undefined
+
+  let legacyToken: string | undefined
+  if (!multiTokenEntries && !options.accountsFile) {
+    legacyToken = options.githubToken
+    if (!legacyToken) {
+      legacyToken = await setupGitHubToken()
+    } else {
+      consola.info("Using provided GitHub token")
+    }
   }
 
   const loaded = await loadAccounts({
     accountsFile: options.accountsFile,
+    legacyTokens: multiTokenEntries,
     legacyToken,
     defaultAccountType: options.accountType,
   })
@@ -216,7 +229,8 @@ export const start = defineCommand({
       alias: "g",
       type: "string",
       description:
-        "Provide GitHub token directly (must be generated using the `auth` subcommand)",
+        "Provide GitHub token(s) directly. Supports comma-separated multi-token format: "
+        + 'name:type:token (e.g. "personal:individual:ghu_aaa,work:business:ghu_bbb")',
     },
     "claude-code": {
       alias: "c",
diff --git a/tests/accounts-loader.test.ts b/tests/accounts-loader.test.ts
index 6d4d7c887..743724bd1 100644
--- a/tests/accounts-loader.test.ts
+++ b/tests/accounts-loader.test.ts
@@ -79,11 +79,8 @@ describe("accounts-loader", () => {
     persistAccounts(accounts)
     persistAccounts(accounts) // again — should not error or duplicate
     const rows = db
-      .query<
-        { name: string; account_type: string },
-        []
-      >("SELECT name, account_type FROM accounts")
-      .all()
+      .prepare("SELECT name, account_type FROM accounts")
+      .all() as Array<{ name: string; account_type: string }>
     expect(rows).toHaveLength(1)
     expect(rows[0].name).toBe("default")
     db.close()
diff --git a/tests/db.test.ts b/tests/db.test.ts
index b61512c41..a450e1967 100644
--- a/tests/db.test.ts
+++ b/tests/db.test.ts
@@ -26,12 +26,13 @@ describe("db module", () => {
     const p = tmpDbPath()
     const db = initDb(p)
 
-    const tables = db
-      .query<{ name: string }, []>(
-        "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",
-      )
-      .all()
-      .map((r) => r.name)
+    const tables = (
+      db
+        .prepare(
+          "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",
+        )
+        .all() as Array<{ name: string }>
+    ).map((r) => r.name)
 
     for (const t of [
       "accounts",
@@ -46,11 +47,8 @@ describe("db module", () => {
     }
 
     const ver = db
-      .query<
-        { value: string },
-        []
-      >("SELECT value FROM meta WHERE key='schema_version'")
-      .get()
+      .prepare("SELECT value FROM meta WHERE key='schema_version'")
+      .get() as { value: string } | undefined
     expect(ver?.value).toBe(String(CURRENT_SCHEMA_VERSION))
 
     db.close()
@@ -60,25 +58,24 @@ describe("db module", () => {
   test("initDb is idempotent: running twice leaves schema_version unchanged and does not duplicate rows", () => {
     const p = tmpDbPath()
     const db1 = initDb(p)
-    db1.run(
-      "INSERT INTO meta (key, value) VALUES ('marker', 'persisted') "
-        + "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
-    )
+    db1
+      .prepare(
+        "INSERT INTO meta (key, value) VALUES ('marker', 'persisted') "
+          + "ON CONFLICT(key) DO UPDATE SET value=excluded.value",
+      )
+      .run()
     db1.close()
 
     __resetDbForTests()
     const db2 = initDb(p)
     const marker = db2
-      .query<{ value: string }, []>("SELECT value FROM meta WHERE key='marker'")
-      .get()
+      .prepare("SELECT value FROM meta WHERE key='marker'")
+      .get() as { value: string } | undefined
     expect(marker?.value).toBe("persisted")
 
     const ver = db2
-      .query<
-        { value: string },
-        []
-      >("SELECT value FROM meta WHERE key='schema_version'")
-      .get()
+      .prepare("SELECT value FROM meta WHERE key='schema_version'")
+      .get() as { value: string } | undefined
     expect(ver?.value).toBe(String(CURRENT_SCHEMA_VERSION))
 
     db2.close()
@@ -102,15 +99,15 @@ describe("db module", () => {
     const db = initDb(p)
 
     withTransaction((d) => {
-      d.run(
+      d.prepare(
         "INSERT INTO accounts (name, account_type, created_at) "
           + "VALUES ('a', 'individual', 1)",
-      )
+      ).run()
     })
 
-    const row = db
-      .query<{ name: string }, []>("SELECT name FROM accounts WHERE name='a'")
-      .get()
+    const row = db.prepare("SELECT name FROM accounts WHERE name='a'").get() as
+      | { name: string }
+      | undefined
     expect(row?.name).toBe("a")
 
     db.close()
@@ -123,18 +120,18 @@ describe("db module", () => {
 
     expect(() =>
       withTransaction((d) => {
-        d.run(
+        d.prepare(
           "INSERT INTO accounts (name, account_type, created_at) "
             + "VALUES ('b', 'individual', 1)",
-        )
+        ).run()
         throw new Error("boom")
       }),
     ).toThrow("boom")
 
-    const row = db
-      .query<{ name: string }, []>("SELECT name FROM accounts WHERE name='b'")
-      .get()
-    expect(row).toBeNull()
+    const row = db.prepare("SELECT name FROM accounts WHERE name='b'").get() as
+      | { name: string }
+      | undefined
+    expect(row).toBeUndefined()
 
     db.close()
     fs.unlinkSync(p)
@@ -143,10 +140,21 @@ describe("db module", () => {
   test("WAL mode is enabled", () => {
     const p = tmpDbPath()
     const db = initDb(p)
-    const mode = db
-      .query<{ journal_mode: string }, []>("PRAGMA journal_mode")
-      .get()
-    expect(mode?.journal_mode.toLowerCase()).toBe("wal")
+    const mode = db.pragma("journal_mode")
+    // bun:sqlite returns { journal_mode: "wal" }, better-sqlite3 returns [{ journal_mode: "wal" }]
+    let val: string
+    if (Array.isArray(mode)) {
+      val = (mode as Array<{ journal_mode: string }>)[0].journal_mode
+    } else if (
+      typeof mode === "object"
+      && mode !== null
+      && "journal_mode" in mode
+    ) {
+      val = (mode as { journal_mode: string }).journal_mode
+    } else {
+      val = String(mode)
+    }
+    expect(val.toLowerCase()).toBe("wal")
     db.close()
     fs.unlinkSync(p)
   })
@@ -154,12 +162,11 @@ describe("db module", () => {
   test("schema includes expected indexes", () => {
     const p = tmpDbPath()
     const db = initDb(p)
-    const idxs = db
-      .query<{ name: string }, []>(
-        "SELECT name FROM sqlite_master WHERE type='index'",
-      )
-      .all()
-      .map((r) => r.name)
+    const idxs = (
+      db
+        .prepare("SELECT name FROM sqlite_master WHERE type='index'")
+        .all() as Array<{ name: string }>
+    ).map((r) => r.name)
     expect(idxs).toContain("idx_usage_account_model_ts")
     expect(idxs).toContain("idx_usage_ts")
     expect(idxs).toContain("idx_pricing_versions_model_time")
diff --git a/tests/parse-github-token.test.ts b/tests/parse-github-token.test.ts
new file mode 100644
index 000000000..1da9a7c3b
--- /dev/null
+++ b/tests/parse-github-token.test.ts
@@ -0,0 +1,122 @@
+import { test, expect, describe } from "bun:test"
+
+import {
+  parseGithubTokenArg,
+  parseGithubTokenArgs,
+} from "../src/lib/accounts-loader"
+
+describe("parseGithubTokenArg", () => {
+  test("1 segment: pure token", () => {
+    const entry = parseGithubTokenArg("ghu_abc123", 0, "individual")
+    expect(entry).toEqual({
+      name: "account-1",
+      github_token: "ghu_abc123",
+      account_type: "individual",
+    })
+  })
+
+  test("1 segment: uses index for name", () => {
+    const entry = parseGithubTokenArg("ghu_xyz", 2, "individual")
+    expect(entry.name).toBe("account-3")
+  })
+
+  test("2 segments: name:token", () => {
+    const entry = parseGithubTokenArg("personal:ghu_abc", 0, "individual")
+    expect(entry).toEqual({
+      name: "personal",
+      github_token: "ghu_abc",
+      account_type: "individual",
+    })
+  })
+
+  test("2 segments: uses defaultType", () => {
+    const entry = parseGithubTokenArg("work:ghu_abc", 0, "business")
+    expect(entry.account_type).toBe("business")
+  })
+
+  test("3 segments: name:type:token", () => {
+    const entry = parseGithubTokenArg("work:business:ghu_abc", 0, "individual")
+    expect(entry).toEqual({
+      name: "work",
+      github_token: "ghu_abc",
+      account_type: "business",
+    })
+  })
+
+  test("3+ segments: token containing colons", () => {
+    const entry = parseGithubTokenArg(
+      "work:enterprise:ghu_abc:def:ghi",
+      0,
+      "individual",
+    )
+    expect(entry).toEqual({
+      name: "work",
+      github_token: "ghu_abc:def:ghi",
+      account_type: "enterprise",
+    })
+  })
+})
+
+describe("parseGithubTokenArgs", () => {
+  test("single token", () => {
+    const entries = parseGithubTokenArgs("ghu_abc", "individual")
+    expect(entries).toHaveLength(1)
+    expect(entries[0].github_token).toBe("ghu_abc")
+  })
+
+  test("multiple comma-separated tokens", () => {
+    const entries = parseGithubTokenArgs(
+      "personal:individual:ghu_aaa,work:business:ghu_bbb",
+      "individual",
+    )
+    expect(entries).toHaveLength(2)
+    expect(entries[0]).toEqual({
+      name: "personal",
+      github_token: "ghu_aaa",
+      account_type: "individual",
+    })
+    expect(entries[1]).toEqual({
+      name: "work",
+      github_token: "ghu_bbb",
+      account_type: "business",
+    })
+  })
+
+  test("trims whitespace around entries", () => {
+    const entries = parseGithubTokenArgs(
+      " a:ghu_aaa , b:ghu_bbb ",
+      "individual",
+    )
+    expect(entries).toHaveLength(2)
+    expect(entries[0].name).toBe("a")
+    expect(entries[1].name).toBe("b")
+  })
+
+  test("ignores empty segments from trailing comma", () => {
+    const entries = parseGithubTokenArgs("a:ghu_aaa,", "individual")
+    expect(entries).toHaveLength(1)
+  })
+
+  test("mixed formats", () => {
+    const entries = parseGithubTokenArgs(
+      "ghu_bare,named:ghu_two,full:business:ghu_three",
+      "individual",
+    )
+    expect(entries).toHaveLength(3)
+    expect(entries[0]).toEqual({
+      name: "account-1",
+      github_token: "ghu_bare",
+      account_type: "individual",
+    })
+    expect(entries[1]).toEqual({
+      name: "named",
+      github_token: "ghu_two",
+      account_type: "individual",
+    })
+    expect(entries[2]).toEqual({
+      name: "full",
+      github_token: "ghu_three",
+      account_type: "business",
+    })
+  })
+})
diff --git a/tests/pricing-sync-runner.test.ts b/tests/pricing-sync-runner.test.ts
index 1061bed25..f0d29b056 100644
--- a/tests/pricing-sync-runner.test.ts
+++ b/tests/pricing-sync-runner.test.ts
@@ -37,19 +37,15 @@ describe("runPricingSync", () => {
     expect(out.updated).toBe(1)
 
     const versions = getDb()
-      .query<
-        { count: number },
-        []
-      >("SELECT COUNT(*) AS count FROM model_pricing_versions")
-      .get()
+      .prepare("SELECT COUNT(*) AS count FROM model_pricing_versions")
+      .get() as { count: number } | undefined
     expect(versions?.count).toBe(1)
 
     const live = getDb()
-      .query<
-        { input_per_mtok: number },
-        []
-      >("SELECT input_per_mtok FROM model_pricing WHERE model_id = 'gpt-4o'")
-      .get()
+      .prepare(
+        "SELECT input_per_mtok FROM model_pricing WHERE model_id = 'gpt-4o'",
+      )
+      .get() as { input_per_mtok: number } | undefined
     expect(live?.input_per_mtok).toBe(5)
   })
 
@@ -64,11 +60,8 @@ describe("runPricingSync", () => {
     })
     expect(r2.updated).toBe(0)
     const versions = getDb()
-      .query<
-        { count: number },
-        []
-      >("SELECT COUNT(*) AS count FROM model_pricing_versions")
-      .get()
+      .prepare("SELECT COUNT(*) AS count FROM model_pricing_versions")
+      .get() as { count: number } | undefined
     expect(versions?.count).toBe(1)
   })
 
@@ -83,11 +76,10 @@ describe("runPricingSync", () => {
     })
     expect(r2.updated).toBe(1)
     const rows = getDb()
-      .query<
-        { effective_to: number | null; input_per_mtok: number },
-        []
-      >("SELECT effective_to, input_per_mtok FROM model_pricing_versions ORDER BY id")
-      .all()
+      .prepare(
+        "SELECT effective_to, input_per_mtok FROM model_pricing_versions ORDER BY id",
+      )
+      .all() as Array<{ effective_to: number | null; input_per_mtok: number }>
     expect(rows).toHaveLength(2)
     expect(rows[0].effective_to).not.toBeNull()
     expect(rows[1].effective_to).toBeNull()
@@ -108,11 +100,10 @@ describe("runPricingSync", () => {
     expect(r2.status).toBe("rejected")
     expect(r2.updated).toBe(0)
     const live = getDb()
-      .query<
-        { input_per_mtok: number },
-        []
-      >("SELECT input_per_mtok FROM model_pricing WHERE model_id = 'gpt-4o'")
-      .get()
+      .prepare(
+        "SELECT input_per_mtok FROM model_pricing WHERE model_id = 'gpt-4o'",
+      )
+      .get() as { input_per_mtok: number } | undefined
     expect(live?.input_per_mtok).toBe(1) // unchanged
   })
 })
diff --git a/tests/usage-recorder.test.ts b/tests/usage-recorder.test.ts
index 0bed74257..09d96975f 100644
--- a/tests/usage-recorder.test.ts
+++ b/tests/usage-recorder.test.ts
@@ -36,10 +36,9 @@ const baseInput = {
 function setupDb() {
   __resetDbForTests()
   const db = initDb(":memory:")
-  db.run(
+  db.prepare(
     "INSERT INTO accounts (name, account_type, created_at) VALUES (?, ?, ?)",
-    [ACCOUNT.name, ACCOUNT.accountType, Date.now()],
-  )
+  ).run(ACCOUNT.name, ACCOUNT.accountType, Date.now())
   return db
 }
 
@@ -50,17 +49,16 @@ describe("recordUsage", () => {
 
   test("inserts an event and a daily row", () => {
     const db = setupDb()
-    db.run(
+    db.prepare(
       `INSERT INTO model_pricing (
          model_id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
          reasoning_per_mtok, premium_multiplier, premium_unit_price,
          updated_at
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
-      ["gpt-4o", 5, 1, 15, 0, 1.0, 0.04, Date.now()],
-    )
+    ).run("gpt-4o", 5, 1, 15, 0, 1.0, 0.04, Date.now())
     recordUsage(baseInput)
 
-    const events = db.query("SELECT * FROM usage_events").all() as Array<{
+    const events = db.prepare("SELECT * FROM usage_events").all() as Array<{
       account_name: string
       model_id: string
       input_tokens: number
@@ -72,7 +70,7 @@ describe("recordUsage", () => {
     expect(events[0].input_price_snapshot).toBe(5)
     expect(events[0].premium_request_count).toBe(1)
 
-    const daily = db.query("SELECT * FROM usage_daily").all() as Array<{
+    const daily = db.prepare("SELECT * FROM usage_daily").all() as Array<{
       req_count: number
       input_tokens: number
       premium_requests: number
@@ -88,11 +86,8 @@ describe("recordUsage", () => {
     recordUsage(baseInput)
     recordUsage(baseInput)
     const daily = db
-      .query<
-        { req_count: number; input_tokens: number },
-        []
-      >("SELECT req_count, input_tokens FROM usage_daily")
-      .all()
+      .prepare("SELECT req_count, input_tokens FROM usage_daily")
+      .all() as Array<{ req_count: number; input_tokens: number }>
     expect(daily).toHaveLength(1)
     expect(daily[0].req_count).toBe(2)
     expect(daily[0].input_tokens).toBe(200)
@@ -102,11 +97,12 @@ describe("recordUsage", () => {
     const db = setupDb()
     recordUsage(baseInput)
     const ev = db
-      .query<
-        { input_price_snapshot: number | null; premium_request_count: number },
-        []
-      >("SELECT input_price_snapshot, premium_request_count FROM usage_events")
-      .get()
+      .prepare(
+        "SELECT input_price_snapshot, premium_request_count FROM usage_events",
+      )
+      .get() as
+      | { input_price_snapshot: number | null; premium_request_count: number }
+      | undefined
     expect(ev?.input_price_snapshot).toBeNull()
     expect(ev?.premium_request_count).toBe(0)
   })
@@ -114,7 +110,7 @@ describe("recordUsage", () => {
   test("isInternal=true inserts nothing", () => {
     const db = setupDb()
     recordUsage({ ...baseInput, isInternal: true })
-    const events = db.query("SELECT * FROM usage_events").all()
+    const events = db.prepare("SELECT * FROM usage_events").all()
     expect(events).toHaveLength(0)
   })
 
diff --git a/tests/usage-stats.test.ts b/tests/usage-stats.test.ts
index 9503f5eb6..6296e848f 100644
--- a/tests/usage-stats.test.ts
+++ b/tests/usage-stats.test.ts
@@ -20,17 +20,15 @@ const ACCOUNT: Account = {
 function setupDb() {
   __resetDbForTests()
   const db = initDb(":memory:")
-  db.run(
+  db.prepare(
     "INSERT INTO accounts (name, account_type, created_at) VALUES (?, ?, ?)",
-    [ACCOUNT.name, ACCOUNT.accountType, Date.now()],
-  )
-  db.run(
+  ).run(ACCOUNT.name, ACCOUNT.accountType, Date.now())
+  db.prepare(
     `INSERT INTO model_pricing (
         model_id, input_per_mtok, cached_input_per_mtok, output_per_mtok,
         reasoning_per_mtok, premium_multiplier, premium_unit_price, updated_at
      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
-    ["gpt-4o", 2, 0, 8, 0, 1, 0.04, Date.now()],
-  )
+  ).run("gpt-4o", 2, 0, 8, 0, 1, 0.04, Date.now())
   return db
 }
 
@@ -87,10 +85,9 @@ describe("computeUsageStats", () => {
       },
     })
     // Bump live pricing
-    getDb().run(
-      "UPDATE model_pricing SET input_per_mtok = ? WHERE model_id = ?",
-      [10, "gpt-4o"],
-    )
+    getDb()
+      .prepare("UPDATE model_pricing SET input_per_mtok = ? WHERE model_id = ?")
+      .run(10, "gpt-4o")
     const stats = computeUsageStats({
       from: 0,
       to: Date.now() + 1,
diff --git a/tsdown.config.ts b/tsdown.config.ts
index 60ef1cdcc..5d23637e9 100644
--- a/tsdown.config.ts
+++ b/tsdown.config.ts
@@ -1,3 +1,4 @@
+/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-require-imports, unicorn/prefer-module */
 import { defineConfig } from "tsdown"
 
 export default defineConfig({
@@ -14,4 +15,22 @@ export default defineConfig({
   env: {
     NODE_ENV: "production",
   },
+
+  // Handle .sql files imported with { type: "text" } as raw text
+  inputOptions: {
+    plugins: [
+      {
+        name: "sql-text-loader",
+        load(id) {
+          if (id.endsWith(".sql")) {
+            const content = require("node:fs").readFileSync(id, "utf8")
+            return `export default ${JSON.stringify(content)};`
+          }
+        },
+      },
+    ],
+  },
+
+  // Mark native modules as external
+  external: ["better-sqlite3", "bun:sqlite"],
 })

From f60b5f19a6926ddcaa558c999b18c0d57c214806 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 11:56:58 +0800
Subject: [PATCH 22/34] chore: enforce LF line endings, add npm publish
 workflow

- Add .gitattributes with eol=lf for cross-platform consistency
- Add GitHub Actions workflow to auto-publish to npm on release

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitattributes                |  2 ++
 .github/workflows/publish.yml | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .github/workflows/publish.yml

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..9a3fe0caa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Force LF line endings everywhere (cross-platform consistency)
+* text=auto eol=lf
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 000000000..3075c90f1
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,34 @@
+name: Publish to npm
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+  id-token: write
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - run: bun install --frozen-lockfile
+
+      - run: bun test
+
+      - run: bun run build
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          registry-url: https://registry.npmjs.org
+
+      - run: npm publish --access public --ignore-scripts
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

From 944c531b0176ac97c4458d21a2bb1ad47c87d41a Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 12:08:56 +0800
Subject: [PATCH 23/34] fix: always parse --github-token format, not only when
 comma present

Single tokens with name:type:token format were treated as raw tokens
because parsing only triggered when comma was found.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/pricing-sync-cmd.ts | 21 +++++++++++----------
 src/start.ts            | 24 +++++++++++++-----------
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/pricing-sync-cmd.ts b/src/pricing-sync-cmd.ts
index 9cbf1b29d..fc0bb4736 100644
--- a/src/pricing-sync-cmd.ts
+++ b/src/pricing-sync-cmd.ts
@@ -38,17 +38,18 @@ async function bootstrapServer(
   initDb(options.dbPath)
   await cacheVSCodeVersion()
 
-  const multiTokenEntries =
-    options.githubToken?.includes(",") ?
-      parseGithubTokenArgs(options.githubToken, options.accountType)
-    : undefined
-
   let legacyToken: string | undefined
-  if (!multiTokenEntries && !options.accountsFile) {
-    legacyToken = options.githubToken
-    if (!legacyToken) {
-      legacyToken = await setupGitHubToken()
-    }
+  let multiTokenEntries: ReturnType<typeof parseGithubTokenArgs> | undefined
+
+  if (options.githubToken && !options.accountsFile) {
+    multiTokenEntries = parseGithubTokenArgs(
+      options.githubToken,
+      options.accountType,
+    )
+  }
+
+  if (!multiTokenEntries?.length && !options.accountsFile) {
+    legacyToken = await setupGitHubToken()
   }
 
   const loaded = await loadAccounts({
diff --git a/src/start.ts b/src/start.ts
index d63f0fe85..2a38e3e8d 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -111,21 +111,23 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   await cacheVSCodeVersion()
 
   // Resolve accounts: multi-token CLI → accounts file → single token → interactive
-  const multiTokenEntries =
-    options.githubToken?.includes(",") ?
-      parseGithubTokenArgs(options.githubToken, options.accountType)
-    : undefined
-
   let legacyToken: string | undefined
-  if (!multiTokenEntries && !options.accountsFile) {
-    legacyToken = options.githubToken
-    if (!legacyToken) {
-      legacyToken = await setupGitHubToken()
-    } else {
-      consola.info("Using provided GitHub token")
+  let multiTokenEntries: ReturnType<typeof parseGithubTokenArgs> | undefined
+
+  if (options.githubToken && !options.accountsFile) {
+    multiTokenEntries = parseGithubTokenArgs(
+      options.githubToken,
+      options.accountType,
+    )
+    if (multiTokenEntries.length > 0) {
+      consola.info("Using provided GitHub token(s)")
     }
   }
 
+  if (!multiTokenEntries?.length && !options.accountsFile) {
+    legacyToken = await setupGitHubToken()
+  }
+
   const loaded = await loadAccounts({
     accountsFile: options.accountsFile,
     legacyTokens: multiTokenEntries,

From 64ab17e69b0eff61d9d9eab4371e19d7fc6974cb Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 12:11:15 +0800
Subject: [PATCH 24/34] feat: support repeated --github-token flags

Citty's underlying parser (mri) returns an array for repeated flags.
Normalize string|string[] to comma-separated before parsing.

Now both formats work:
  --github-token "a:individual:ghu_x,b:business:ghu_y"
  --github-token a:individual:ghu_x --github-token b:business:ghu_y

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/pricing-sync-cmd.ts |  9 ++++++++-
 src/start.ts            | 10 +++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/pricing-sync-cmd.ts b/src/pricing-sync-cmd.ts
index fc0bb4736..4dd11b576 100644
--- a/src/pricing-sync-cmd.ts
+++ b/src/pricing-sync-cmd.ts
@@ -23,6 +23,13 @@ interface RunPricingSyncCmdOptions {
   verbose: boolean
 }
 
+function normalizeGithubToken(
+  raw: string | Array<string> | undefined,
+): string | undefined {
+  if (!raw) return undefined
+  return Array.isArray(raw) ? raw.join(",") : raw
+}
+
 async function bootstrapServer(
   options: RunPricingSyncCmdOptions,
 ): Promise<void> {
@@ -153,7 +160,7 @@ export const pricingSyncCmd = defineCommand({
     return runPricingSyncCmd({
       port: Number.parseInt(args.port, 10),
       syncModel: args["sync-model"],
-      githubToken: args["github-token"],
+      githubToken: normalizeGithubToken(args["github-token"]),
       accountsFile: args["accounts-file"],
       accountType: args["account-type"],
       dbPath: args["db-path"],
diff --git a/src/start.ts b/src/start.ts
index 2a38e3e8d..2d34f9e38 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -41,6 +41,14 @@ interface RunServerOptions {
   pricingSyncDisabled: boolean
 }
 
+/** Citty may return a string or string[] for repeated --github-token flags. Normalize to comma-separated. */
+function normalizeGithubToken(
+  raw: string | Array<string> | undefined,
+): string | undefined {
+  if (!raw) return undefined
+  return Array.isArray(raw) ? raw.join(",") : raw
+}
+
 async function promptClaudeCodeSetup(serverUrl: string): Promise<void> {
   invariant(state.models, "Models should be loaded by now")
 
@@ -297,7 +305,7 @@ export const start = defineCommand({
       manual: args.manual,
       rateLimit,
       rateLimitWait: args.wait,
-      githubToken: args["github-token"],
+      githubToken: normalizeGithubToken(args["github-token"]),
       claudeCode: args["claude-code"],
       showToken: args["show-token"],
       proxyEnv: args["proxy-env"],

From 2e7c01deacf36cc8b636b56a788cee45d0fe771e Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 12:12:38 +0800
Subject: [PATCH 25/34] docs: update README with repeated --github-token flag
 support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 1885c2f4b..a0577563c 100644
--- a/README.md
+++ b/README.md
@@ -85,24 +85,33 @@ npx @weavejam/copilot-proxy@latest start --github-token ghu_YOUR_TOKEN
 
 ### Multi-Token CLI
 
-Pass multiple tokens in a single `--github-token` flag using comma-separated `name:type:token` format:
+Pass multiple tokens using `--github-token` with `name:type:token` format. Both repeated flags and comma-separated values are supported:
 
 ```sh
-# Full format: name:type:token
+# Repeated flags (recommended for readability)
+npx @weavejam/copilot-proxy@latest start \
+  --github-token personal:individual:ghu_aaa \
+  --github-token work:business:ghu_bbb
+
+# Comma-separated in a single flag
 npx @weavejam/copilot-proxy@latest start \
   --github-token "personal:individual:ghu_aaa,work:business:ghu_bbb"
 
 # Omit type (defaults to individual): name:token
 npx @weavejam/copilot-proxy@latest start \
-  --github-token "personal:ghu_aaa,work:ghu_bbb"
+  --github-token personal:ghu_aaa \
+  --github-token work:ghu_bbb
 
-# Pure tokens (auto-named account-1, account-2): token
+# Pure tokens (auto-named account-1, account-2)
 npx @weavejam/copilot-proxy@latest start \
-  --github-token "ghu_aaa,ghu_bbb"
+  --github-token ghu_aaa \
+  --github-token ghu_bbb
 
 # Mixed formats work too
 npx @weavejam/copilot-proxy@latest start \
-  --github-token "ghu_bare,named:ghu_two,full:business:ghu_three"
+  --github-token ghu_bare \
+  --github-token named:ghu_two \
+  --github-token full:business:ghu_three
 ```
 
 ### Accounts File
@@ -266,7 +275,7 @@ npx @weavejam/copilot-proxy@latest check-usage
 | `--port` | Port to listen on | 4141 | `-p` |
 | `--verbose` | Enable verbose logging | false | `-v` |
 | `--account-type` | Account type (individual, business, enterprise) | individual | `-a` |
-| `--github-token` | GitHub token(s), supports `name:type:token` comma-separated format | — | `-g` |
+| `--github-token` | GitHub token(s), supports repeated flags or comma-separated `name:type:token` | — | `-g` |
 | `--accounts-file` | Path to accounts JSON file | — | — |
 | `--strategy` | Load balancing: round-robin, least-busy, least-recent | round-robin | — |
 | `--rate-limit` | Minimum seconds between requests | — | `-r` |
@@ -384,4 +393,4 @@ All data is stored in `~/.local/share/copilot-api/`:
 - Use `--rate-limit 30 --wait` to throttle requests and queue them instead of erroring.
 - Use `--manual` to approve each request individually — useful for debugging or auditing.
 - Use `--account-type business` or `enterprise` if your Copilot subscription is through an organization. See the [official docs](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization).
-- Multi-token CLI (`--github-token "a:individual:ghu_x,b:business:ghu_y"`) is a quick alternative to accounts files for CI/CD or one-off use.
+- Multi-token CLI supports both repeated flags (`--github-token a:ghu_x --github-token b:ghu_y`) and comma-separated (`--github-token "a:ghu_x,b:ghu_y"`). Great for CI/CD or one-off use without an accounts file.

From e1b1a67a536448085f146f25ad1c2abd2f4001f1 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 12:29:34 +0800
Subject: [PATCH 26/34] feat: auto-detect account type and username from GitHub
 token

- Add detectAccountInfo() that calls /user and /copilot_internal/user
- Simplify --github-token format to just token or name:token
- Account type (individual/business/enterprise) auto-detected from API
- Username auto-detected when no name provided
- Remove --account-type from auth add (always auto-detect)
- Deduplicate account names with suffix when needed

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md                                  | 40 +++++-------
 src/auth-add.ts                            | 46 +++----------
 src/lib/accounts-loader.ts                 | 76 +++++++++++++++-------
 src/pricing-sync-cmd.ts                    |  5 +-
 src/services/github/detect-account-info.ts | 54 +++++++++++++++
 src/start.ts                               |  5 +-
 tests/parse-github-token.test.ts           | 66 +++++--------------
 7 files changed, 146 insertions(+), 146 deletions(-)
 create mode 100644 src/services/github/detect-account-info.ts

diff --git a/README.md b/README.md
index a0577563c..f0de4d0a2 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an **Op
 - **OpenAI & Anthropic Compatible API** — `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`, `/v1/messages`, `/v1/messages/count_tokens`
 - **Claude Code Integration** — One-command setup with `--claude-code`, or manual `settings.json` configuration
 - **Multi-Account & Load Balancing** — Configure multiple GitHub accounts with round-robin, least-busy, or least-recent strategies
-- **Multi-Token CLI** — Pass multiple tokens directly on the command line with `name:type:token` format, no accounts file needed
+- **Multi-Token CLI** — Pass multiple tokens directly on the command line, account type and username auto-detected
 - **Usage Tracking & Dashboard** — Built-in SQLite database tracks per-model token usage and costs; web dashboard at `/usage`
 - **Automatic Pricing Sync** — Periodically fetches model pricing from Azure and Anthropic to calculate accurate cost estimates
 - **Rate Limiting** — Configurable request throttling with optional wait mode
@@ -85,35 +85,26 @@ npx @weavejam/copilot-proxy@latest start --github-token ghu_YOUR_TOKEN
 
 ### Multi-Token CLI
 
-Pass multiple tokens using `--github-token` with `name:type:token` format. Both repeated flags and comma-separated values are supported:
+Pass multiple tokens using `--github-token`. Account type and username are **auto-detected** from each token via the GitHub API. Both repeated flags and comma-separated values are supported:
 
 ```sh
-# Repeated flags (recommended for readability)
+# Multiple tokens — username and account type auto-detected
 npx @weavejam/copilot-proxy@latest start \
-  --github-token personal:individual:ghu_aaa \
-  --github-token work:business:ghu_bbb
-
-# Comma-separated in a single flag
-npx @weavejam/copilot-proxy@latest start \
-  --github-token "personal:individual:ghu_aaa,work:business:ghu_bbb"
+  --github-token ghu_aaa \
+  --github-token ghu_bbb
 
-# Omit type (defaults to individual): name:token
+# With custom account names: name:token
 npx @weavejam/copilot-proxy@latest start \
   --github-token personal:ghu_aaa \
   --github-token work:ghu_bbb
 
-# Pure tokens (auto-named account-1, account-2)
-npx @weavejam/copilot-proxy@latest start \
-  --github-token ghu_aaa \
-  --github-token ghu_bbb
-
-# Mixed formats work too
+# Comma-separated in a single flag
 npx @weavejam/copilot-proxy@latest start \
-  --github-token ghu_bare \
-  --github-token named:ghu_two \
-  --github-token full:business:ghu_three
+  --github-token "ghu_aaa,work:ghu_bbb"
 ```
 
+Format: `token` or `name:token`. If no name is given, the GitHub username is used automatically. Account type (individual/business/enterprise) is always auto-detected — no need to specify it.
+
 ### Accounts File
 
 For persistent multi-account configuration, create a JSON file:
@@ -134,11 +125,11 @@ npx @weavejam/copilot-proxy@latest start --accounts-file ./accounts.json
 ### Account Management Commands
 
 ```sh
-# Add account interactively (auto-detects GitHub username)
+# Add account interactively (auto-detects GitHub username and account type)
 npx @weavejam/copilot-proxy@latest auth add
 
-# Add with explicit name and type
-npx @weavejam/copilot-proxy@latest auth add --name work --account-type business
+# Add with explicit name
+npx @weavejam/copilot-proxy@latest auth add --name work
 
 # List all configured accounts
 npx @weavejam/copilot-proxy@latest auth list
@@ -275,7 +266,7 @@ npx @weavejam/copilot-proxy@latest check-usage
 | `--port` | Port to listen on | 4141 | `-p` |
 | `--verbose` | Enable verbose logging | false | `-v` |
 | `--account-type` | Account type (individual, business, enterprise) | individual | `-a` |
-| `--github-token` | GitHub token(s), supports repeated flags or comma-separated `name:type:token` | — | `-g` |
+| `--github-token` | GitHub token(s), supports repeated flags or comma-separated `name:token` (type auto-detected) | — | `-g` |
 | `--accounts-file` | Path to accounts JSON file | — | — |
 | `--strategy` | Load balancing: round-robin, least-busy, least-recent | round-robin | — |
 | `--rate-limit` | Minimum seconds between requests | — | `-r` |
@@ -294,7 +285,6 @@ npx @weavejam/copilot-proxy@latest check-usage
 | Option | Description | Default | Alias |
 | --- | --- | --- | --- |
 | `--name` | Account name (defaults to GitHub username) | auto | `-n` |
-| `--account-type` | Account type | individual | `-a` |
 | `--verbose` | Verbose logging | false | `-v` |
 
 ### `auth remove` Options
@@ -393,4 +383,4 @@ All data is stored in `~/.local/share/copilot-api/`:
 - Use `--rate-limit 30 --wait` to throttle requests and queue them instead of erroring.
 - Use `--manual` to approve each request individually — useful for debugging or auditing.
 - Use `--account-type business` or `enterprise` if your Copilot subscription is through an organization. See the [official docs](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization).
-- Multi-token CLI supports both repeated flags (`--github-token a:ghu_x --github-token b:ghu_y`) and comma-separated (`--github-token "a:ghu_x,b:ghu_y"`). Great for CI/CD or one-off use without an accounts file.
+- Multi-token CLI supports both repeated flags (`--github-token ghu_x --github-token ghu_y`) and comma-separated (`--github-token "ghu_x,ghu_y"`). Account type and username are auto-detected.
diff --git a/src/auth-add.ts b/src/auth-add.ts
index dd84261a8..dd8815d1a 100644
--- a/src/auth-add.ts
+++ b/src/auth-add.ts
@@ -5,55 +5,32 @@ import { addAccountEntry } from "./lib/accounts-loader"
 import { ensurePaths } from "./lib/paths"
 import { state } from "./lib/state"
 import { runDeviceFlow } from "./lib/token"
-import { getGitHubUser } from "./services/github/get-user"
+import { detectAccountInfo } from "./services/github/detect-account-info"
 
 interface RunAuthAddOptions {
   name?: string
-  accountType: string
   verbose: boolean
 }
 
-async function resolveAccountName(
-  token: string,
-  explicitName?: string,
-): Promise<string> {
-  if (explicitName) return explicitName
-  try {
-    const user = await getGitHubUser({
-      account: {
-        name: "_probe",
-        accountType: state.accountType,
-        githubToken: token,
-        copilotTokenRefreshAt: 0,
-        inFlight: 0,
-        lastUsedAt: 0,
-        failureCount: 0,
-      },
-      vsCodeVersion: state.vsCodeVersion,
-    })
-    consola.info(`Detected GitHub user: ${user.login}`)
-    return user.login
-  } catch {
-    consola.warn("Could not detect GitHub username, using 'default'")
-    return "default"
-  }
-}
-
 export async function runAuthAdd(options: RunAuthAddOptions): Promise<void> {
   if (options.verbose) {
     consola.level = 5
   }
-  state.accountType = options.accountType
   await ensurePaths()
 
   consola.info("Starting GitHub Device Flow authentication…")
   const token = await runDeviceFlow()
 
-  const name = await resolveAccountName(token, options.name)
+  const info = await detectAccountInfo(token)
+  const name = options.name ?? info.login
+  state.accountType = info.accountType
+
+  consola.info(`Detected GitHub user: ${info.login} (${info.accountType})`)
+
   await addAccountEntry({
     name,
     github_token: token,
-    account_type: options.accountType,
+    account_type: info.accountType,
   })
 }
 
@@ -68,12 +45,6 @@ export const authAdd = defineCommand({
       type: "string",
       description: "Account name (defaults to GitHub username if not provided)",
     },
-    "account-type": {
-      alias: "a",
-      type: "string",
-      default: "individual",
-      description: "Account type (individual, business, enterprise)",
-    },
     verbose: {
       alias: "v",
       type: "boolean",
@@ -84,7 +55,6 @@ export const authAdd = defineCommand({
   run({ args }) {
     return runAuthAdd({
       name: args.name,
-      accountType: args["account-type"],
       verbose: args.verbose,
     })
   },
diff --git a/src/lib/accounts-loader.ts b/src/lib/accounts-loader.ts
index ef3656343..9745401dc 100644
--- a/src/lib/accounts-loader.ts
+++ b/src/lib/accounts-loader.ts
@@ -2,6 +2,8 @@ import consola from "consola"
 import fs from "node:fs/promises"
 import path from "node:path"
 
+import { detectAccountInfo } from "~/services/github/detect-account-info"
+
 import type { Account } from "./account-pool"
 
 import { getDb } from "./db"
@@ -40,52 +42,74 @@ const FRESH = (): Pick<
 })
 
 /**
- * Parse a single `--github-token` segment with format `name:type:token`.
+ * Parse a single `--github-token` segment.
  *
- * - 1 segment  → pure token, name=`account-{index}`, type=defaultType
- * - 2 segments → `name:token`, type=defaultType
- * - 3+ segments → `name:type:token` (token may contain `:`)
+ * - 1 segment  → pure token, name=`account-{index}`, type auto-detected later
+ * - 2+ segments → `name:token` (token may contain `:`)
  */
 export function parseGithubTokenArg(
   raw: string,
   index: number,
-  defaultType: string,
 ): AccountsFileEntry {
-  const parts = raw.split(":")
-  if (parts.length === 1) {
+  const idx = raw.indexOf(":")
+  if (idx === -1) {
     return {
       name: `account-${index + 1}`,
-      github_token: parts[0],
-      account_type: defaultType,
+      github_token: raw,
     }
   }
-  if (parts.length === 2) {
-    return {
-      name: parts[0],
-      github_token: parts[1],
-      account_type: defaultType,
-    }
-  }
-  // 3+ segments: name:type:token (token may contain colons)
   return {
-    name: parts[0],
-    account_type: parts[1],
-    github_token: parts.slice(2).join(":"),
+    name: raw.slice(0, idx),
+    github_token: raw.slice(idx + 1),
   }
 }
 
 /**
  * Parse a comma-separated `--github-token` value into multiple account entries.
  */
-export function parseGithubTokenArgs(
-  raw: string,
-  defaultType: string,
-): Array<AccountsFileEntry> {
+export function parseGithubTokenArgs(raw: string): Array<AccountsFileEntry> {
   return raw
     .split(",")
     .map((s) => s.trim())
     .filter((s) => s.length > 0)
-    .map((s, i) => parseGithubTokenArg(s, i, defaultType))
+    .map((s, i) => parseGithubTokenArg(s, i))
+}
+
+/**
+ * Auto-detect account type and username for entries missing them.
+ * Mutates entries in-place. Runs detections in parallel.
+ */
+async function enrichWithDetection(
+  entries: Array<AccountsFileEntry>,
+): Promise<void> {
+  const results = await Promise.all(
+    entries.map((e) => detectAccountInfo(e.github_token)),
+  )
+  for (const [i, entry] of entries.entries()) {
+    const info = results[i]
+
+    // Auto-fill account type
+    if (!entry.account_type) {
+      entry.account_type = info.accountType
+    }
+
+    // Replace auto-generated name with GitHub username
+    if (entry.name.startsWith("account-")) {
+      entry.name = info.login
+    }
+
+    consola.info(`[${entry.name}] detected as ${entry.account_type} account`)
+  }
+
+  // Deduplicate names by appending suffix
+  const seen = new Map<string, number>()
+  for (const entry of entries) {
+    const count = seen.get(entry.name) ?? 0
+    if (count > 0) {
+      entry.name = `${entry.name}-${count + 1}`
+    }
+    seen.set(entry.name, count + 1)
+  }
 }
 
 export async function loadAccounts(
@@ -105,6 +129,8 @@ export async function loadAccounts(
       })
     }
   } else if (options.legacyTokens && options.legacyTokens.length > 0) {
+    // Auto-detect account type and username for CLI tokens
+    await enrichWithDetection(options.legacyTokens)
     for (const entry of options.legacyTokens) {
       accounts.push({
         name: entry.name,
diff --git a/src/pricing-sync-cmd.ts b/src/pricing-sync-cmd.ts
index 4dd11b576..937be3076 100644
--- a/src/pricing-sync-cmd.ts
+++ b/src/pricing-sync-cmd.ts
@@ -49,10 +49,7 @@ async function bootstrapServer(
   let multiTokenEntries: ReturnType<typeof parseGithubTokenArgs> | undefined
 
   if (options.githubToken && !options.accountsFile) {
-    multiTokenEntries = parseGithubTokenArgs(
-      options.githubToken,
-      options.accountType,
-    )
+    multiTokenEntries = parseGithubTokenArgs(options.githubToken)
   }
 
   if (!multiTokenEntries?.length && !options.accountsFile) {
diff --git a/src/services/github/detect-account-info.ts b/src/services/github/detect-account-info.ts
new file mode 100644
index 000000000..e2a8674da
--- /dev/null
+++ b/src/services/github/detect-account-info.ts
@@ -0,0 +1,54 @@
+import consola from "consola"
+
+import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config"
+
+export interface AccountInfo {
+  login: string
+  accountType: string
+}
+
+/**
+ * Detect GitHub username and Copilot account type from a raw GitHub token.
+ * Calls `/user` and `/copilot_internal/user` in parallel.
+ */
+export async function detectAccountInfo(
+  githubToken: string,
+): Promise<AccountInfo> {
+  const headers = {
+    authorization: `token ${githubToken}`,
+    ...standardHeaders(),
+  }
+
+  const [userResult, copilotResult] = await Promise.allSettled([
+    fetch(`${GITHUB_API_BASE_URL}/user`, { headers }).then(async (r) => {
+      if (!r.ok) throw new Error(`HTTP ${r.status}`)
+      return (await r.json()) as { login: string }
+    }),
+    fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, { headers }).then(
+      async (r) => {
+        if (!r.ok) throw new Error(`HTTP ${r.status}`)
+        return (await r.json()) as { copilot_plan: string }
+      },
+    ),
+  ])
+
+  const login =
+    userResult.status === "fulfilled" ? userResult.value.login : "unknown"
+  if (userResult.status === "rejected") {
+    consola.warn("Could not detect GitHub username, using 'unknown'")
+  }
+
+  let accountType = "individual"
+  if (copilotResult.status === "fulfilled") {
+    const plan = copilotResult.value.copilot_plan
+    if (plan === "business" || plan === "enterprise") {
+      accountType = plan
+    }
+  } else {
+    consola.warn(
+      `[${login}] Could not detect Copilot plan, defaulting to individual`,
+    )
+  }
+
+  return { login, accountType }
+}
diff --git a/src/start.ts b/src/start.ts
index 2d34f9e38..1d14daa6a 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -123,10 +123,7 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   let multiTokenEntries: ReturnType<typeof parseGithubTokenArgs> | undefined
 
   if (options.githubToken && !options.accountsFile) {
-    multiTokenEntries = parseGithubTokenArgs(
-      options.githubToken,
-      options.accountType,
-    )
+    multiTokenEntries = parseGithubTokenArgs(options.githubToken)
     if (multiTokenEntries.length > 0) {
       consola.info("Using provided GitHub token(s)")
     }
diff --git a/tests/parse-github-token.test.ts b/tests/parse-github-token.test.ts
index 1da9a7c3b..9b8f7a0f3 100644
--- a/tests/parse-github-token.test.ts
+++ b/tests/parse-github-token.test.ts
@@ -7,116 +7,82 @@ import {
 
 describe("parseGithubTokenArg", () => {
   test("1 segment: pure token", () => {
-    const entry = parseGithubTokenArg("ghu_abc123", 0, "individual")
+    const entry = parseGithubTokenArg("ghu_abc123", 0)
     expect(entry).toEqual({
       name: "account-1",
       github_token: "ghu_abc123",
-      account_type: "individual",
     })
   })
 
   test("1 segment: uses index for name", () => {
-    const entry = parseGithubTokenArg("ghu_xyz", 2, "individual")
+    const entry = parseGithubTokenArg("ghu_xyz", 2)
     expect(entry.name).toBe("account-3")
   })
 
   test("2 segments: name:token", () => {
-    const entry = parseGithubTokenArg("personal:ghu_abc", 0, "individual")
+    const entry = parseGithubTokenArg("personal:ghu_abc", 0)
     expect(entry).toEqual({
       name: "personal",
       github_token: "ghu_abc",
-      account_type: "individual",
     })
   })
 
-  test("2 segments: uses defaultType", () => {
-    const entry = parseGithubTokenArg("work:ghu_abc", 0, "business")
-    expect(entry.account_type).toBe("business")
-  })
-
-  test("3 segments: name:type:token", () => {
-    const entry = parseGithubTokenArg("work:business:ghu_abc", 0, "individual")
+  test("token containing colons: first colon splits name", () => {
+    const entry = parseGithubTokenArg("work:ghu_abc:def:ghi", 0)
     expect(entry).toEqual({
       name: "work",
-      github_token: "ghu_abc",
-      account_type: "business",
+      github_token: "ghu_abc:def:ghi",
     })
   })
 
-  test("3+ segments: token containing colons", () => {
-    const entry = parseGithubTokenArg(
-      "work:enterprise:ghu_abc:def:ghi",
-      0,
-      "individual",
-    )
-    expect(entry).toEqual({
-      name: "work",
-      github_token: "ghu_abc:def:ghi",
-      account_type: "enterprise",
-    })
+  test("no account_type is set (auto-detected later)", () => {
+    const entry = parseGithubTokenArg("myname:ghu_token", 0)
+    expect(entry.account_type).toBeUndefined()
   })
 })
 
 describe("parseGithubTokenArgs", () => {
   test("single token", () => {
-    const entries = parseGithubTokenArgs("ghu_abc", "individual")
+    const entries = parseGithubTokenArgs("ghu_abc")
     expect(entries).toHaveLength(1)
     expect(entries[0].github_token).toBe("ghu_abc")
   })
 
   test("multiple comma-separated tokens", () => {
-    const entries = parseGithubTokenArgs(
-      "personal:individual:ghu_aaa,work:business:ghu_bbb",
-      "individual",
-    )
+    const entries = parseGithubTokenArgs("personal:ghu_aaa,work:ghu_bbb")
     expect(entries).toHaveLength(2)
     expect(entries[0]).toEqual({
       name: "personal",
       github_token: "ghu_aaa",
-      account_type: "individual",
     })
     expect(entries[1]).toEqual({
       name: "work",
       github_token: "ghu_bbb",
-      account_type: "business",
     })
   })
 
   test("trims whitespace around entries", () => {
-    const entries = parseGithubTokenArgs(
-      " a:ghu_aaa , b:ghu_bbb ",
-      "individual",
-    )
+    const entries = parseGithubTokenArgs(" a:ghu_aaa , b:ghu_bbb ")
     expect(entries).toHaveLength(2)
     expect(entries[0].name).toBe("a")
     expect(entries[1].name).toBe("b")
   })
 
   test("ignores empty segments from trailing comma", () => {
-    const entries = parseGithubTokenArgs("a:ghu_aaa,", "individual")
+    const entries = parseGithubTokenArgs("a:ghu_aaa,")
     expect(entries).toHaveLength(1)
   })
 
-  test("mixed formats", () => {
-    const entries = parseGithubTokenArgs(
-      "ghu_bare,named:ghu_two,full:business:ghu_three",
-      "individual",
-    )
-    expect(entries).toHaveLength(3)
+  test("mixed formats: bare tokens and named tokens", () => {
+    const entries = parseGithubTokenArgs("ghu_bare,named:ghu_two")
+    expect(entries).toHaveLength(2)
     expect(entries[0]).toEqual({
       name: "account-1",
       github_token: "ghu_bare",
-      account_type: "individual",
     })
     expect(entries[1]).toEqual({
       name: "named",
       github_token: "ghu_two",
-      account_type: "individual",
-    })
-    expect(entries[2]).toEqual({
-      name: "full",
-      github_token: "ghu_three",
-      account_type: "business",
     })
   })
 })

From b614d5cefda58f10fbed8b0e54bab3b5fd27860f Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 12:37:17 +0800
Subject: [PATCH 27/34] feat: log request headers to file for session analysis

Logs method, URL, and all headers of every incoming request to
~/.local/share/copilot-api/headers.log. Authorization values are
truncated for safety.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lib/header-logger.ts | 38 ++++++++++++++++++++++++++++++++++++++
 src/lib/paths.ts         |  3 +++
 src/server.ts            |  2 ++
 3 files changed, 43 insertions(+)
 create mode 100644 src/lib/header-logger.ts

diff --git a/src/lib/header-logger.ts b/src/lib/header-logger.ts
new file mode 100644
index 000000000..4d5553299
--- /dev/null
+++ b/src/lib/header-logger.ts
@@ -0,0 +1,38 @@
+import type { MiddlewareHandler } from "hono"
+
+import { appendFileSync } from "node:fs"
+
+import { PATHS } from "./paths"
+
+/**
+ * Hono middleware that logs every request's method, URL, and headers
+ * to ~/.local/share/copilot-api/headers.log in a human-readable format.
+ */
+export function headerLogger(): MiddlewareHandler {
+  return async (c, next) => {
+    const ts = new Date().toISOString()
+    const method = c.req.method
+    const url = c.req.url
+
+    const headers: Record<string, string> = {}
+    for (const [k, v] of c.req.raw.headers.entries()) {
+      // Redact authorization tokens for safety
+      headers[k] =
+        k.toLowerCase() === "authorization" ? v.slice(0, 20) + "..." : v
+    }
+
+    const line = [
+      `\n--- ${ts} ${method} ${url} ---`,
+      JSON.stringify(headers, null, 2),
+      "",
+    ].join("\n")
+
+    try {
+      appendFileSync(PATHS.HEADER_LOG_PATH, line, "utf8")
+    } catch {
+      // Non-critical — don't crash if write fails
+    }
+
+    await next()
+  }
+}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index f347221d8..00fd4b3f1 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -8,11 +8,14 @@ const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
 const USAGE_DB_PATH = path.join(APP_DIR, "usage.sqlite")
 const ACCOUNTS_FILE_PATH = path.join(APP_DIR, "accounts.json")
 
+const HEADER_LOG_PATH = path.join(APP_DIR, "headers.log")
+
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
   USAGE_DB_PATH,
   ACCOUNTS_FILE_PATH,
+  HEADER_LOG_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
diff --git a/src/server.ts b/src/server.ts
index 462a278f3..c1e2eea0e 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,6 +2,7 @@ import { Hono } from "hono"
 import { cors } from "hono/cors"
 import { logger } from "hono/logger"
 
+import { headerLogger } from "./lib/header-logger"
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
@@ -13,6 +14,7 @@ export const server = new Hono()
 
 server.use(logger())
 server.use(cors())
+server.use(headerLogger())
 
 server.get("/", (c) => c.text("Server running"))
 

From 28999054a3fe1f08c67c3277a12eba69cb279403 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 13:52:17 +0800
Subject: [PATCH 28/34] feat: replace header logger with configurable request
 recorder

Add --record-requests, --record-dir, --record-parts CLI flags for
opt-in HTTP request/response recording to per-request directories.
Remove unconditional header-only logging. Disable sourcemaps in build
to reduce npm package size. Bump to v0.9.0.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 package.json                |   2 +-
 src/lib/header-logger.ts    |  38 -------
 src/lib/paths.ts            |   4 +-
 src/lib/request-recorder.ts | 196 ++++++++++++++++++++++++++++++++++++
 src/server.ts               |   2 -
 src/start.ts                |  37 +++++++
 tsdown.config.ts            |   2 +-
 7 files changed, 237 insertions(+), 44 deletions(-)
 delete mode 100644 src/lib/header-logger.ts
 create mode 100644 src/lib/request-recorder.ts

diff --git a/package.json b/package.json
index 3124013d9..2ea041199 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@weavejam/copilot-proxy",
-  "version": "0.8.1",
+  "version": "0.9.0",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "keywords": [
     "proxy",
diff --git a/src/lib/header-logger.ts b/src/lib/header-logger.ts
deleted file mode 100644
index 4d5553299..000000000
--- a/src/lib/header-logger.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import type { MiddlewareHandler } from "hono"
-
-import { appendFileSync } from "node:fs"
-
-import { PATHS } from "./paths"
-
-/**
- * Hono middleware that logs every request's method, URL, and headers
- * to ~/.local/share/copilot-api/headers.log in a human-readable format.
- */
-export function headerLogger(): MiddlewareHandler {
-  return async (c, next) => {
-    const ts = new Date().toISOString()
-    const method = c.req.method
-    const url = c.req.url
-
-    const headers: Record<string, string> = {}
-    for (const [k, v] of c.req.raw.headers.entries()) {
-      // Redact authorization tokens for safety
-      headers[k] =
-        k.toLowerCase() === "authorization" ? v.slice(0, 20) + "..." : v
-    }
-
-    const line = [
-      `\n--- ${ts} ${method} ${url} ---`,
-      JSON.stringify(headers, null, 2),
-      "",
-    ].join("\n")
-
-    try {
-      appendFileSync(PATHS.HEADER_LOG_PATH, line, "utf8")
-    } catch {
-      // Non-critical — don't crash if write fails
-    }
-
-    await next()
-  }
-}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 00fd4b3f1..e42db9bb9 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -8,14 +8,14 @@ const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
 const USAGE_DB_PATH = path.join(APP_DIR, "usage.sqlite")
 const ACCOUNTS_FILE_PATH = path.join(APP_DIR, "accounts.json")
 
-const HEADER_LOG_PATH = path.join(APP_DIR, "headers.log")
+const RECORD_DIR = path.join(APP_DIR, "logs")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
   USAGE_DB_PATH,
   ACCOUNTS_FILE_PATH,
-  HEADER_LOG_PATH,
+  RECORD_DIR,
 }
 
 export async function ensurePaths(): Promise<void> {
diff --git a/src/lib/request-recorder.ts b/src/lib/request-recorder.ts
new file mode 100644
index 000000000..9c9987861
--- /dev/null
+++ b/src/lib/request-recorder.ts
@@ -0,0 +1,196 @@
+import type { MiddlewareHandler } from "hono"
+
+import { mkdirSync, writeFileSync } from "node:fs"
+import path from "node:path"
+
+export interface RecorderOptions {
+  logDir: string
+  requestHeaders: boolean
+  requestBody: boolean
+  responseHeaders: boolean
+  responseBody: boolean
+}
+
+/**
+ * Get the directory for the current minute: {logDir}/YYYYMMDD_HHmm00
+ */
+function getMinuteDir(logDir: string): string {
+  const now = new Date()
+  const y = now.getFullYear()
+  const mo = String(now.getMonth() + 1).padStart(2, "0")
+  const d = String(now.getDate()).padStart(2, "0")
+  const h = String(now.getHours()).padStart(2, "0")
+  const mi = String(now.getMinutes()).padStart(2, "0")
+  return path.join(logDir, `${y}${mo}${d}_${h}${mi}00`)
+}
+
+/**
+ * Build a per-request directory inside the minute directory.
+ */
+function getRequestDir(
+  logDir: string,
+  method: string,
+  urlPath: string,
+): string {
+  const ts = Date.now()
+  const safePath =
+    encodeURIComponent(urlPath.replace(/^\//, "")).slice(0, 200) || "root"
+  const dirName = `${ts}_${method}_${safePath}`
+  const minuteDir = getMinuteDir(logDir)
+  const requestDir = path.join(minuteDir, dirName)
+  mkdirSync(requestDir, { recursive: true })
+  return requestDir
+}
+
+function getFileExtension(contentType: string | null): string {
+  if (!contentType) return "bin"
+  const mime = contentType.toLowerCase().split(";")[0].trim()
+  const map: Record<string, string> = {
+    "application/json": "json",
+    "text/html": "html",
+    "text/plain": "txt",
+    "text/event-stream": "txt",
+    "text/css": "css",
+    "text/javascript": "js",
+    "application/javascript": "js",
+    "application/xml": "xml",
+    "text/xml": "xml",
+  }
+  return map[mime] ?? "bin"
+}
+
+function redactHeaders(raw: Headers): Record<string, string> {
+  const out: Record<string, string> = {}
+  for (const [k, v] of raw.entries()) {
+    out[k] = k.toLowerCase() === "authorization" ? v.slice(0, 20) + "..." : v
+  }
+  return out
+}
+
+function saveJson(filePath: string, data: unknown): void {
+  writeFileSync(filePath, JSON.stringify(data, null, 2), "utf8")
+}
+
+interface SaveBodyOpts {
+  dir: string
+  prefix: string
+  buf: ArrayBuffer
+  contentType: string | null
+}
+
+function saveBody(opts: SaveBodyOpts): void {
+  const { dir, prefix, buf, contentType } = opts
+  if (buf.byteLength === 0) return
+  const ext = getFileExtension(contentType)
+  const bytes = Buffer.from(buf)
+  if (ext === "json") {
+    try {
+      const parsed: unknown = JSON.parse(bytes.toString("utf8"))
+      saveJson(path.join(dir, `${prefix}.json`), parsed)
+      return
+    } catch {
+      // fall through to raw write
+    }
+  }
+  writeFileSync(path.join(dir, `${prefix}.${ext}`), bytes)
+}
+
+/**
+ * Hono middleware that records request/response data to per-request directories.
+ */
+export function requestRecorder(opts: RecorderOptions): MiddlewareHandler {
+  return async (c, next) => {
+    const method = c.req.method
+    const url = new URL(c.req.url)
+    const urlPath = url.pathname + url.search
+
+    let requestDir: string | undefined
+
+    try {
+      requestDir = getRequestDir(opts.logDir, method, urlPath)
+
+      // Request headers
+      if (opts.requestHeaders) {
+        saveJson(path.join(requestDir, "request_headers.json"), {
+          method,
+          url: c.req.url,
+          headers: redactHeaders(c.req.raw.headers),
+          timestamp: new Date().toISOString(),
+        })
+      }
+
+      // Request body — clone to avoid consuming the original
+      if (opts.requestBody && method !== "GET" && method !== "HEAD") {
+        try {
+          const buf = await c.req.raw.clone().arrayBuffer()
+          saveBody({
+            dir: requestDir,
+            prefix: "request_body",
+            buf,
+            contentType: c.req.header("content-type") ?? null,
+          })
+        } catch {
+          // body read failure — non-critical
+        }
+      }
+    } catch {
+      // directory or write failure — non-critical
+    }
+
+    await next()
+
+    // Response recording
+    if (requestDir) {
+      try {
+        if (opts.responseHeaders) {
+          const resHeaders: Record<string, string> = {}
+          for (const [k, v] of c.res.headers.entries()) {
+            resHeaders[k] = v
+          }
+          saveJson(path.join(requestDir, "response_headers.json"), {
+            status: c.res.status,
+            headers: resHeaders,
+            timestamp: new Date().toISOString(),
+          })
+        }
+
+        if (opts.responseBody) {
+          try {
+            const buf = await c.res.clone().arrayBuffer()
+            saveBody({
+              dir: requestDir,
+              prefix: "response_body",
+              buf,
+              contentType: c.res.headers.get("content-type"),
+            })
+          } catch {
+            // body read failure — non-critical
+          }
+        }
+      } catch {
+        // non-critical
+      }
+    }
+  }
+}
+
+/** Parse --record-parts comma-separated string into boolean flags. */
+export function parseRecordParts(raw: string): {
+  requestHeaders: boolean
+  requestBody: boolean
+  responseHeaders: boolean
+  responseBody: boolean
+} {
+  const parts = new Set(
+    raw
+      .split(",")
+      .map((s) => s.trim().toLowerCase())
+      .filter(Boolean),
+  )
+  return {
+    requestHeaders: parts.has("req-header"),
+    requestBody: parts.has("req-body"),
+    responseHeaders: parts.has("res-header"),
+    responseBody: parts.has("res-body"),
+  }
+}
diff --git a/src/server.ts b/src/server.ts
index c1e2eea0e..462a278f3 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,7 +2,6 @@ import { Hono } from "hono"
 import { cors } from "hono/cors"
 import { logger } from "hono/logger"
 
-import { headerLogger } from "./lib/header-logger"
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
@@ -14,7 +13,6 @@ export const server = new Hono()
 
 server.use(logger())
 server.use(cors())
-server.use(headerLogger())
 
 server.get("/", (c) => c.text("Server running"))
 
diff --git a/src/start.ts b/src/start.ts
index 1d14daa6a..b65eab810 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -16,6 +16,7 @@ import { initDb } from "./lib/db"
 import { ensurePaths, PATHS } from "./lib/paths"
 import { schedulePricingSync } from "./lib/pricing-scheduler"
 import { initProxyFromEnv } from "./lib/proxy"
+import { parseRecordParts, requestRecorder } from "./lib/request-recorder"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotTokenFor, setupGitHubToken } from "./lib/token"
@@ -39,6 +40,9 @@ interface RunServerOptions {
   pricingSyncModel?: string
   pricingSyncIntervalDays: number
   pricingSyncDisabled: boolean
+  recordRequests: boolean
+  recordDir: string
+  recordParts: string
 }
 
 /** Citty may return a string or string[] for repeated --github-token flags. Normalize to comma-separated. */
@@ -93,6 +97,7 @@ async function promptClaudeCodeSetup(serverUrl: string): Promise<void> {
   }
 }
 
+// eslint-disable-next-line complexity
 export async function runServer(options: RunServerOptions): Promise<void> {
   if (options.proxyEnv) {
     initProxyFromEnv()
@@ -177,6 +182,18 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`,
   )
 
+  // Conditionally enable request recording
+  if (options.recordRequests) {
+    const parts = parseRecordParts(options.recordParts)
+    server.use(
+      requestRecorder({
+        logDir: options.recordDir,
+        ...parts,
+      }),
+    )
+    consola.info(`Request recording enabled → ${options.recordDir}`)
+  }
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,
@@ -288,6 +305,23 @@ export const start = defineCommand({
       default: false,
       description: "Disable automatic background pricing sync",
     },
+    "record-requests": {
+      type: "boolean",
+      default: false,
+      description: "Enable recording of HTTP requests and responses to disk",
+    },
+    "record-dir": {
+      type: "string",
+      default: PATHS.RECORD_DIR,
+      description:
+        "Directory for recorded request data (default: ~/.local/share/copilot-api/logs)",
+    },
+    "record-parts": {
+      type: "string",
+      default: "req-header,req-body,res-header,res-body",
+      description:
+        "Comma-separated parts to record: req-header, req-body, res-header, res-body",
+    },
   },
   run({ args }) {
     const rateLimitRaw = args["rate-limit"]
@@ -315,6 +349,9 @@ export const start = defineCommand({
         10,
       ),
       pricingSyncDisabled: args["pricing-sync-disabled"],
+      recordRequests: args["record-requests"],
+      recordDir: args["record-dir"],
+      recordParts: args["record-parts"],
     })
   },
 })
diff --git a/tsdown.config.ts b/tsdown.config.ts
index 5d23637e9..b2c7d4e19 100644
--- a/tsdown.config.ts
+++ b/tsdown.config.ts
@@ -8,7 +8,7 @@ export default defineConfig({
   target: "es2022",
   platform: "node",
 
-  sourcemap: true,
+  sourcemap: false,
   clean: true,
   removeNodeProtocol: false,
 

From 98af2739377a49bac1a470ba3ed6e5d50445063a Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 22:00:41 +0800
Subject: [PATCH 29/34] docs: add request recording section to README

Document --record-requests, --record-dir, --record-parts CLI flags
with usage examples, available parts table, and directory structure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/README.md b/README.md
index f0de4d0a2..ec707d8e0 100644
--- a/README.md
+++ b/README.md
@@ -279,6 +279,9 @@ npx @weavejam/copilot-proxy@latest check-usage
 | `--pricing-sync-model` | Model for LLM pricing extraction | auto | — |
 | `--pricing-sync-interval-days` | Days between automatic pricing syncs | 7 | — |
 | `--pricing-sync-disabled` | Disable automatic pricing sync | false | — |
+| `--record-requests` | Enable HTTP request/response recording to disk | false | — |
+| `--record-dir` | Directory for recorded data | `~/.local/share/copilot-api/logs` | — |
+| `--record-parts` | Comma-separated parts to record (see below) | all | — |
 
 ### `auth add` Options
 
@@ -313,6 +316,55 @@ npx @weavejam/copilot-proxy@latest check-usage
 | --- | --- | --- |
 | `--json` | Output as JSON | false |
 
+## Request Recording
+
+Record HTTP requests and responses to disk for debugging or analyzing API usage patterns (e.g., studying how Claude Code calls the API).
+
+Recording is **disabled by default** — enable it with `--record-requests`:
+
+```sh
+# Record everything (request/response headers and bodies)
+npx @weavejam/copilot-proxy@latest start --record-requests
+
+# Record only request headers and bodies
+npx @weavejam/copilot-proxy@latest start --record-requests --record-parts req-header,req-body
+
+# Record only response data
+npx @weavejam/copilot-proxy@latest start --record-requests --record-parts res-header,res-body
+
+# Custom output directory
+npx @weavejam/copilot-proxy@latest start --record-requests --record-dir ./my-logs
+```
+
+**Available parts** for `--record-parts` (comma-separated):
+
+| Part | Description |
+| --- | --- |
+| `req-header` | Request method, URL, and headers |
+| `req-body` | Request body (JSON pretty-printed) |
+| `res-header` | Response status and headers |
+| `res-body` | Response body (JSON pretty-printed) |
+
+**Directory structure**: Recordings are organized by minute, with each request in its own subdirectory:
+
+```
+logs/
+  20260426_143500/           # minute bucket
+    1714142100123_POST_v1%2Fmessages/
+      request_headers.json
+      request_body.json
+      response_headers.json
+      response_body.json
+    1714142105456_GET_v1%2Fmodels/
+      request_headers.json
+      response_headers.json
+      response_body.json
+  20260426_143600/
+    ...
+```
+
+> **Note**: Authorization headers are automatically redacted in recorded files.
+
 ## Docker
 
 ### Build & Run
@@ -377,6 +429,7 @@ All data is stored in `~/.local/share/copilot-api/`:
 | `github_token` | Stored GitHub OAuth token |
 | `accounts.json` | Multi-account configuration |
 | `usage.sqlite` | Usage tracking and pricing data |
+| `logs/` | Request recordings (when `--record-requests` enabled) |
 
 ## Usage Tips
 

From ced475bb901e0721b7ce090a1dbc1659f78b73d5 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Sun, 26 Apr 2026 22:16:13 +0800
Subject: [PATCH 30/34] fix: register request recorder middleware before routes

Hono middleware added after routes won't execute. Refactor server.ts
to a createServer() factory that registers the recorder before route
registration. Bump to v0.9.1.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 package.json            |  2 +-
 src/pricing-sync-cmd.ts |  4 ++--
 src/server.ts           | 46 +++++++++++++++++++++++++++--------------
 src/start.ts            | 22 ++++++++++----------
 4 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/package.json b/package.json
index 2ea041199..eae9076f7 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@weavejam/copilot-proxy",
-  "version": "0.9.0",
+  "version": "0.9.1",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "keywords": [
     "proxy",
diff --git a/src/pricing-sync-cmd.ts b/src/pricing-sync-cmd.ts
index 937be3076..b72b6f5ba 100644
--- a/src/pricing-sync-cmd.ts
+++ b/src/pricing-sync-cmd.ts
@@ -10,7 +10,7 @@ import { initProxyFromEnv } from "./lib/proxy"
 import { state } from "./lib/state"
 import { setupCopilotTokenFor, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
-import { server } from "./server"
+import { createServer } from "./server"
 
 interface RunPricingSyncCmdOptions {
   port: number
@@ -74,7 +74,7 @@ async function bootstrapServer(
 
 function startTempServer(port: number): void {
   serve({
-    fetch: server.fetch as ServerHandler,
+    fetch: createServer().fetch as ServerHandler,
     port,
   })
 }
diff --git a/src/server.ts b/src/server.ts
index 462a278f3..8878709e8 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,6 +2,9 @@ import { Hono } from "hono"
 import { cors } from "hono/cors"
 import { logger } from "hono/logger"
 
+import type { RecorderOptions } from "./lib/request-recorder"
+
+import { requestRecorder } from "./lib/request-recorder"
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
@@ -9,23 +12,36 @@ import { modelRoutes } from "./routes/models/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
-export const server = new Hono()
+export interface ServerOptions {
+  recorder?: RecorderOptions
+}
+
+export function createServer(options: ServerOptions = {}): Hono {
+  const server = new Hono()
+
+  server.use(logger())
+  server.use(cors())
+
+  // Request recorder must be registered before routes
+  if (options.recorder) {
+    server.use(requestRecorder(options.recorder))
+  }
 
-server.use(logger())
-server.use(cors())
+  server.get("/", (c) => c.text("Server running"))
 
-server.get("/", (c) => c.text("Server running"))
+  server.route("/chat/completions", completionRoutes)
+  server.route("/models", modelRoutes)
+  server.route("/embeddings", embeddingRoutes)
+  server.route("/usage", usageRoute)
+  server.route("/token", tokenRoute)
 
-server.route("/chat/completions", completionRoutes)
-server.route("/models", modelRoutes)
-server.route("/embeddings", embeddingRoutes)
-server.route("/usage", usageRoute)
-server.route("/token", tokenRoute)
+  // Compatibility with tools that expect v1/ prefix
+  server.route("/v1/chat/completions", completionRoutes)
+  server.route("/v1/models", modelRoutes)
+  server.route("/v1/embeddings", embeddingRoutes)
 
-// Compatibility with tools that expect v1/ prefix
-server.route("/v1/chat/completions", completionRoutes)
-server.route("/v1/models", modelRoutes)
-server.route("/v1/embeddings", embeddingRoutes)
+  // Anthropic compatible endpoints
+  server.route("/v1/messages", messageRoutes)
 
-// Anthropic compatible endpoints
-server.route("/v1/messages", messageRoutes)
+  return server
+}
diff --git a/src/start.ts b/src/start.ts
index b65eab810..00560ce4c 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -16,12 +16,12 @@ import { initDb } from "./lib/db"
 import { ensurePaths, PATHS } from "./lib/paths"
 import { schedulePricingSync } from "./lib/pricing-scheduler"
 import { initProxyFromEnv } from "./lib/proxy"
-import { parseRecordParts, requestRecorder } from "./lib/request-recorder"
+import { parseRecordParts } from "./lib/request-recorder"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotTokenFor, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
-import { server } from "./server"
+import { createServer } from "./server"
 
 interface RunServerOptions {
   port: number
@@ -182,18 +182,18 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`,
   )
 
-  // Conditionally enable request recording
-  if (options.recordRequests) {
-    const parts = parseRecordParts(options.recordParts)
-    server.use(
-      requestRecorder({
-        logDir: options.recordDir,
-        ...parts,
-      }),
-    )
+  // Create server with optional request recording
+  const recorderOpts =
+    options.recordRequests ?
+      { logDir: options.recordDir, ...parseRecordParts(options.recordParts) }
+    : undefined
+
+  if (recorderOpts) {
     consola.info(`Request recording enabled → ${options.recordDir}`)
   }
 
+  const server = createServer({ recorder: recorderOpts })
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,

From 7e6c9eb9ad60c24d7eecbef47e988d2e46947268 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Tue, 5 May 2026 09:54:18 +0800
Subject: [PATCH 31/34] fix(streaming): use stream.aborted instead of
 req.signal.aborted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Node.js + srvx, the request's AbortSignal is bound to the IncomingMessage
'close' event, which fires as soon as the request body is fully read — not
when the client actually disconnects. This caused streaming handlers to
break out of the for-await loop on the very first frame, returning a 200
response with an empty SSE body.

Switch to hono's stream.aborted, which is only set when the response stream
is cancelled by the downstream consumer (i.e. when nodeRes.close fires —
the real client disconnect signal).

Verified node dist/main.js now produces full SSE on both /v1/chat/completions
and /v1/messages.

Also includes:
- tsconfig.json: exclude dist from typecheck
- sqlite-adapter.ts: tighten transaction generic so callers can pass
  typed callbacks without the overload conflict
- utils.ts: hoist getBigrams to module scope (lint)
- bump to 0.9.2

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 package.json                           |  2 +-
 src/lib/sqlite-adapter.ts              | 10 ++++------
 src/lib/utils.ts                       | 23 ++++++++++++-----------
 src/routes/chat-completions/handler.ts |  2 +-
 src/routes/messages/handler.ts         |  2 +-
 tests/model-mapping.test.ts            | 22 ++++++----------------
 tsconfig.json                          |  4 +++-
 7 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/package.json b/package.json
index eae9076f7..10c27d7ba 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@weavejam/copilot-proxy",
-  "version": "0.9.1",
+  "version": "0.9.2",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "keywords": [
     "proxy",
diff --git a/src/lib/sqlite-adapter.ts b/src/lib/sqlite-adapter.ts
index 937050f81..93f3b71af 100644
--- a/src/lib/sqlite-adapter.ts
+++ b/src/lib/sqlite-adapter.ts
@@ -1,4 +1,4 @@
-/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-return, @typescript-eslint/no-require-imports, unicorn/prefer-module, @typescript-eslint/no-unnecessary-type-parameters */
+/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-return, @typescript-eslint/no-require-imports, unicorn/prefer-module */
 /**
  * Runtime-adaptive SQLite adapter.
  *
@@ -16,9 +16,7 @@ export interface DbInstance {
   prepare(sql: string): DbStatement
   exec(sql: string): void
   pragma(pragma: string): unknown
-  transaction<T>(
-    fn: (...args: Array<unknown>) => T,
-  ): (...args: Array<unknown>) => T
+  transaction<F extends (...args: Array<never>) => unknown>(fn: F): F
   close(): void
 }
 
@@ -66,7 +64,7 @@ function createBunDatabase(dbPath: string): DbInstance {
       }
       return db.query(`PRAGMA ${pragma}`).get()
     },
-    transaction<T>(fn: (...args: Array<unknown>) => T) {
+    transaction<F extends (...args: Array<never>) => unknown>(fn: F): F {
       return db.transaction(fn)
     },
     close() {
@@ -100,7 +98,7 @@ function createBetterSqlite3Database(dbPath: string): DbInstance {
     pragma(pragma: string) {
       return db.pragma(pragma)
     },
-    transaction<T>(fn: (...args: Array<unknown>) => T) {
+    transaction<F extends (...args: Array<never>) => unknown>(fn: F): F {
       return db.transaction(fn)
     },
     close() {
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index 1ce123380..e43f5df1f 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -1,12 +1,13 @@
-import consola from "consola"
 import type { Context } from "hono"
 
+import consola from "consola"
+
 import type { Account } from "~/lib/account-pool"
 import type { ApiContext } from "~/lib/api-config"
+import type { Model } from "~/services/copilot/get-models"
 
 import { getModels } from "~/services/copilot/get-models"
 import { getVSCodeVersion } from "~/services/get-vscode-version"
-import type { Model } from "~/services/copilot/get-models"
 
 import { state } from "./state"
 
@@ -25,7 +26,7 @@ export function normalizeClaudeModelVersion(model: string): string {
 
   // Convert numeric segments from hyphen to dot, e.g. claude-opus-4-6 -> claude-opus-4.6.
   // Only replace when the next numeric token ends at '-' or end, so suffixes like '-1m' stay unchanged.
-  return model.replace(/(\d)-(?=\d(?:-|$))/g, "$1.")
+  return model.replaceAll(/(\d)-(?=\d(?:-|$))/g, "$1.")
 }
 
 /**
@@ -56,16 +57,16 @@ export function resolveModelId(model: string, c?: Context): string {
 /**
  * Calculate Jaccard similarity between two strings based on character bigrams.
  */
-export function jaccardSimilarity(str1: string, str2: string): number {
-  const getBigrams = (str: string): Set<string> => {
-    const bigrams = new Set<string>()
-    const normalized = str.toLowerCase().replace(/[^a-z0-9]/g, "")
-    for (let i = 0; i < normalized.length - 1; i++) {
-      bigrams.add(normalized.substring(i, i + 2))
-    }
-    return bigrams
+function getBigrams(str: string): Set<string> {
+  const bigrams = new Set<string>()
+  const normalized = str.toLowerCase().replaceAll(/[^a-z0-9]/g, "")
+  for (let i = 0; i < normalized.length - 1; i++) {
+    bigrams.add(normalized.slice(i, i + 2))
   }
+  return bigrams
+}
 
+export function jaccardSimilarity(str1: string, str2: string): number {
   const bigrams1 = getBigrams(str1)
   const bigrams2 = getBigrams(str2)
 
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 2038ffca3..f8e825039 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -89,7 +89,7 @@ function streamAndRecord(
     let lastRequestId: string | undefined
     try {
       for await (const rawEvent of response) {
-        if (c.req.raw.signal.aborted) {
+        if (stream.aborted) {
           status = "aborted"
           break
         }
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 9c453599a..68b1c22e6 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -109,7 +109,7 @@ function streamAndRecord(
 
     try {
       for await (const rawEvent of response) {
-        if (c.req.raw.signal.aborted) {
+        if (stream.aborted) {
           status = "aborted"
           break
         }
diff --git a/tests/model-mapping.test.ts b/tests/model-mapping.test.ts
index 94bc9993c..64900fcd8 100644
--- a/tests/model-mapping.test.ts
+++ b/tests/model-mapping.test.ts
@@ -1,5 +1,7 @@
-import { describe, expect, test } from "bun:test"
 import type { Context } from "hono"
+
+import { describe, expect, test } from "bun:test"
+
 import type { Model } from "~/services/copilot/get-models"
 
 import {
@@ -53,33 +55,21 @@ describe("model mapping", () => {
   })
 
   test("uses exact match before fuzzy matching", () => {
-    const models = makeModels([
-      "claude-opus-4.6",
-      "claude-sonnet-4.5",
-      "auto",
-    ])
+    const models = makeModels(["claude-opus-4.6", "claude-sonnet-4.5", "auto"])
     expect(mapModelIdToAvailableModels("claude-opus-4.6", models)).toBe(
       "claude-opus-4.6",
     )
   })
 
   test("uses fuzzy match when exact model is missing", () => {
-    const models = makeModels([
-      "claude-opus-4.6",
-      "claude-sonnet-4.5",
-      "auto",
-    ])
+    const models = makeModels(["claude-opus-4.6", "claude-sonnet-4.5", "auto"])
     expect(mapModelIdToAvailableModels("claude-opus-4-6", models)).toBe(
       "claude-opus-4.6",
     )
   })
 
   test("falls back to auto-version model when no fuzzy match", () => {
-    const models = makeModels([
-      "claude-opus-4.6",
-      "auto",
-      "gpt-4o",
-    ])
+    const models = makeModels(["claude-opus-4.6", "auto", "gpt-4o"])
     expect(mapModelIdToAvailableModels("nonexistent-model", models)).toBe(
       "auto",
     )
diff --git a/tsconfig.json b/tsconfig.json
index 8ff821caf..0eeccf51f 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -21,5 +21,7 @@
     "paths": {
       "~/*": ["./src/*"]
     }
-  }
+  },
+  "include": ["src/**/*", "tests/**/*", "*.config.ts"],
+  "exclude": ["node_modules", "dist"]
 }

From ec76bfac45623c1b70ec4810824731efb77ead8d Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Tue, 5 May 2026 09:58:59 +0800
Subject: [PATCH 32/34] chore(ci): include eslint config in tsconfig, allow
 manual publish

- tsconfig.json: include *.config.js so typescript-eslint's project
  service can lint eslint.config.js (CI was failing for this reason).
- publish.yml: add workflow_dispatch trigger so a tag can be published
  even when the release event was created by another workflow
  (changelogithub-created releases use GITHUB_TOKEN, which by GitHub
  design does not chain-trigger other workflows).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .github/workflows/publish.yml | 8 ++++++++
 tsconfig.json                 | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 3075c90f1..93fc00d34 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -3,6 +3,12 @@ name: Publish to npm
 on:
   release:
     types: [published]
+  workflow_dispatch:
+    inputs:
+      ref:
+        description: "Tag or branch to publish (e.g. v0.9.2)"
+        required: true
+        type: string
 
 permissions:
   contents: read
@@ -13,6 +19,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.ref || github.ref }}
 
       - uses: oven-sh/setup-bun@v2
         with:
diff --git a/tsconfig.json b/tsconfig.json
index 0eeccf51f..7b561e815 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -22,6 +22,6 @@
       "~/*": ["./src/*"]
     }
   },
-  "include": ["src/**/*", "tests/**/*", "*.config.ts"],
+  "include": ["src/**/*", "tests/**/*", "*.config.ts", "*.config.js"],
   "exclude": ["node_modules", "dist"]
 }

From f6779c8184f2e69adbf274715f15e93f3f87c52d Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Thu, 14 May 2026 01:36:05 +0800
Subject: [PATCH 33/34] feat: route gpt-5* models to Copilot /responses
 endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GitHub Copilot's gpt-5 family (gpt-5, gpt-5-mini, gpt-5.4, gpt-5.4-mini,
gpt-5-codex, ...) only works on the /responses endpoint and rejects
/chat/completions with `unsupported_api_for_model`. This caused both
/v1/chat/completions and /v1/messages to 400 for any gpt-5 model.

- Add a per-process model→endpoint memory in state, defaulted by name
  prefix (gpt-5* → responses, else chat) and updated on each success.
- Add upstream-router.dispatchChatCompletion() that picks the endpoint,
  then on `unsupported_api_for_model` flips once and retries.
- Translate OpenAI chat payloads to Copilot's Responses protocol on the
  way out, and translate streaming + non-streaming Responses output back
  into chat.completion shape so Anthropic translation, usage normalizer,
  and recorder need no changes.
- Add native /v1/responses passthrough endpoint with usage recording.
---
 src/lib/state.ts                              |  12 +
 src/lib/usage-normalizer.ts                   |  44 ++
 src/lib/usage-recorder.ts                     |   6 +-
 src/routes/chat-completions/handler.ts        |   8 +-
 src/routes/messages/handler.ts                |   8 +-
 src/routes/responses/handler.ts               | 191 +++++++
 src/routes/responses/route.ts                 |  15 +
 src/server.ts                                 |   3 +
 src/services/copilot/create-responses.ts      |  99 ++++
 src/services/copilot/responses-translation.ts | 520 ++++++++++++++++++
 src/services/copilot/upstream-router.ts       |  94 ++++
 11 files changed, 991 insertions(+), 9 deletions(-)
 create mode 100644 src/routes/responses/handler.ts
 create mode 100644 src/routes/responses/route.ts
 create mode 100644 src/services/copilot/create-responses.ts
 create mode 100644 src/services/copilot/responses-translation.ts
 create mode 100644 src/services/copilot/upstream-router.ts

diff --git a/src/lib/state.ts b/src/lib/state.ts
index 72bb388de..74739c1a1 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -2,6 +2,8 @@ import type { ModelsResponse } from "~/services/copilot/get-models"
 
 import type { Account, AccountPool, Strategy } from "./account-pool"
 
+export type CopilotUpstreamEndpoint = "chat" | "responses"
+
 export interface State {
   pool?: AccountPool
   strategy: Strategy
@@ -17,6 +19,15 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
+
+  /**
+   * Per-model memory of which Copilot upstream endpoint last succeeded.
+   * Some models (gpt-5*) only work on /responses; others only on
+   * /chat/completions. Once a model is observed to succeed on one endpoint,
+   * we keep using that endpoint for the rest of the process lifetime so
+   * we don't pay an extra failed round trip on every call.
+   */
+  modelEndpointRoute: Map<string, CopilotUpstreamEndpoint>
 }
 
 export const state: State = {
@@ -25,6 +36,7 @@ export const state: State = {
   manualApprove: false,
   rateLimitWait: false,
   showToken: false,
+  modelEndpointRoute: new Map(),
 }
 
 /** Convenience: the first usable account. */
diff --git a/src/lib/usage-normalizer.ts b/src/lib/usage-normalizer.ts
index e041878c1..9fb4237ef 100644
--- a/src/lib/usage-normalizer.ts
+++ b/src/lib/usage-normalizer.ts
@@ -97,6 +97,50 @@ export interface StreamUsageAccumulator {
   finalize(): NormalizedUsage
 }
 
+interface ResponsesUsageShape {
+  input_tokens?: number
+  output_tokens?: number
+  total_tokens?: number
+  input_tokens_details?: { cached_tokens?: number }
+  output_tokens_details?: { reasoning_tokens?: number }
+}
+
+export function normalizeResponsesFinal(usage: unknown): NormalizedUsage {
+  const u = (usage ?? {}) as ResponsesUsageShape
+  const inputTokens = numOr0(u.input_tokens)
+  const cachedInputTokens = numOr0(u.input_tokens_details?.cached_tokens)
+  const outputTokens = numOr0(u.output_tokens)
+  const reasoningTokens = numOr0(u.output_tokens_details?.reasoning_tokens)
+  const totalTokens = numOr0(u.total_tokens) || inputTokens + outputTokens
+  return {
+    inputTokens,
+    cachedInputTokens,
+    outputTokens,
+    reasoningTokens,
+    totalTokens,
+  }
+}
+
+export function createResponsesAccumulator(): StreamUsageAccumulator {
+  let saved: ResponsesUsageShape | undefined
+  return {
+    feed(chunk) {
+      const c = chunk as
+        | { type?: string; response?: { usage?: ResponsesUsageShape } }
+        | null
+        | undefined
+      if (!c) return
+      if (c.type === "response.completed" && c.response?.usage) {
+        saved = c.response.usage
+      }
+    },
+    finalize() {
+      if (!saved) throw new UsageMissingError()
+      return normalizeResponsesFinal(saved)
+    },
+  }
+}
+
 export function createOpenAIAccumulator(): StreamUsageAccumulator {
   let saved: OpenAIUsageShape | undefined
 
diff --git a/src/lib/usage-recorder.ts b/src/lib/usage-recorder.ts
index a3395ffcd..969f180d6 100644
--- a/src/lib/usage-recorder.ts
+++ b/src/lib/usage-recorder.ts
@@ -5,7 +5,11 @@ import type { NormalizedUsage } from "./usage-normalizer"
 
 import { getDb } from "./db"
 
-export type UsageEndpoint = "chat.completions" | "messages" | "embeddings"
+export type UsageEndpoint =
+  | "chat.completions"
+  | "messages"
+  | "embeddings"
+  | "responses"
 export type UpstreamFormat = "openai" | "anthropic"
 export type UsageStatus = "ok" | "error" | "aborted"
 
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index f8e825039..391cc0227 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -20,10 +20,10 @@ import { recordUsage } from "~/lib/usage-recorder"
 import { isNullish, makeApiContext, resolveAndMapModelId } from "~/lib/utils"
 import { withAccount } from "~/lib/with-account"
 import {
-  createChatCompletions,
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
+import { dispatchChatCompletion } from "~/services/copilot/upstream-router"
 
 const ZERO_USAGE: NormalizedUsage = {
   inputTokens: 0,
@@ -158,11 +158,11 @@ export async function handleCompletion(c: Context) {
   const tStart = Date.now()
   let usedAccount: Account | undefined
 
-  let response: Awaited<ReturnType<typeof createChatCompletions>>
+  let response: Awaited<ReturnType<typeof dispatchChatCompletion>>
   try {
     response = await withAccount(c, (account) => {
       usedAccount = account
-      return createChatCompletions(makeApiContext(account), payload)
+      return dispatchChatCompletion(makeApiContext(account), payload)
     })
   } catch (err) {
     if (usedAccount) {
@@ -218,5 +218,5 @@ export async function handleCompletion(c: Context) {
 }
 
 const isNonStreaming = (
-  response: Awaited<ReturnType<typeof createChatCompletions>>,
+  response: Awaited<ReturnType<typeof dispatchChatCompletion>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 68b1c22e6..2ad1b0e68 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -18,10 +18,10 @@ import { recordUsage } from "~/lib/usage-recorder"
 import { makeApiContext, resolveAndMapModelId } from "~/lib/utils"
 import { withAccount } from "~/lib/with-account"
 import {
-  createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { dispatchChatCompletion } from "~/services/copilot/upstream-router"
 
 import {
   type AnthropicMessagesPayload,
@@ -184,11 +184,11 @@ export async function handleCompletion(c: Context) {
   const tStart = Date.now()
   let usedAccount: Account | undefined
 
-  let response: Awaited<ReturnType<typeof createChatCompletions>>
+  let response: Awaited<ReturnType<typeof dispatchChatCompletion>>
   try {
     response = await withAccount(c, (account) => {
       usedAccount = account
-      return createChatCompletions(makeApiContext(account), openAIPayload)
+      return dispatchChatCompletion(makeApiContext(account), openAIPayload)
     })
   } catch (err) {
     if (usedAccount) {
@@ -262,5 +262,5 @@ export async function handleCompletion(c: Context) {
 }
 
 const isNonStreaming = (
-  response: Awaited<ReturnType<typeof createChatCompletions>>,
+  response: Awaited<ReturnType<typeof dispatchChatCompletion>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
new file mode 100644
index 000000000..513cb3631
--- /dev/null
+++ b/src/routes/responses/handler.ts
@@ -0,0 +1,191 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+import { events } from "fetch-event-stream"
+import { streamSSE } from "hono/streaming"
+
+import type { Account } from "~/lib/account-pool"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createResponsesAccumulator,
+  normalizeResponsesFinal,
+  UsageMissingError,
+  type NormalizedUsage,
+} from "~/lib/usage-normalizer"
+import { recordUsage } from "~/lib/usage-recorder"
+import { makeApiContext, resolveAndMapModelId } from "~/lib/utils"
+import { withAccount } from "~/lib/with-account"
+
+const ZERO_USAGE: NormalizedUsage = {
+  inputTokens: 0,
+  cachedInputTokens: 0,
+  outputTokens: 0,
+  reasoningTokens: 0,
+  totalTokens: 0,
+}
+
+interface ResponsesRequestBody {
+  model?: string
+  stream?: boolean
+  [key: string]: unknown
+}
+
+interface CallContext {
+  account: Account
+  modelId: string
+  isInternal: boolean
+  tStart: number
+}
+
+async function fetchResponses(
+  account: Account,
+  payload: ResponsesRequestBody,
+  modelId: string,
+): Promise<Response> {
+  const ctx = makeApiContext(account)
+  const headers: Record<string, string> = {
+    ...copilotHeaders(ctx, false),
+  }
+  const res = await fetch(`${copilotBaseUrl(ctx)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+  if (!res.ok) {
+    consola.error("Failed to call /responses", res)
+    throw new HTTPError("Failed to call /responses", res)
+  }
+  state.modelEndpointRoute.set(modelId, "responses")
+  return res
+}
+
+async function handleNonStream(
+  c: Context,
+  upstream: Response,
+  cc: CallContext,
+) {
+  const text = await upstream.text()
+  let parsed: { id?: string; usage?: unknown } | undefined
+  try {
+    parsed = JSON.parse(text) as { id?: string; usage?: unknown }
+  } catch {
+    parsed = undefined
+  }
+  const usage =
+    parsed === undefined ? ZERO_USAGE : normalizeResponsesFinal(parsed.usage)
+  recordUsage({
+    account: cc.account,
+    modelId: cc.modelId,
+    endpoint: "responses",
+    upstreamFormat: "openai",
+    isStreaming: false,
+    usage,
+    durationMs: Date.now() - cc.tStart,
+    status: "ok",
+    requestId: parsed?.id,
+    isInternal: cc.isInternal,
+  })
+  c.header("content-type", "application/json")
+  return c.body(text)
+}
+
+function handleStream(c: Context, upstream: Response, cc: CallContext) {
+  return streamSSE(c, async (stream) => {
+    const accumulator = createResponsesAccumulator()
+    let status: "ok" | "error" | "aborted" = "ok"
+    let lastRequestId: string | undefined
+    try {
+      for await (const ev of events(upstream)) {
+        if (stream.aborted) {
+          status = "aborted"
+          break
+        }
+        if (!ev.data) continue
+        if (ev.data === "[DONE]") {
+          await stream.writeSSE({ data: "[DONE]" })
+          break
+        }
+        try {
+          const parsed = JSON.parse(ev.data) as {
+            type?: string
+            response?: { id?: string }
+          }
+          accumulator.feed(parsed)
+          if (parsed.response?.id) lastRequestId = parsed.response.id
+        } catch {
+          // ignore non-JSON frames
+        }
+        await stream.writeSSE({ event: ev.event, data: ev.data })
+      }
+    } catch (err) {
+      status = "error"
+      consola.error("Streaming /responses error:", err)
+    }
+
+    let usage: NormalizedUsage
+    try {
+      usage = accumulator.finalize()
+    } catch (err) {
+      if (!(err instanceof UsageMissingError)) {
+        consola.error("Failed to finalize Responses stream usage:", err)
+      }
+      usage = ZERO_USAGE
+      if (status === "ok") status = "error"
+    }
+
+    recordUsage({
+      account: cc.account,
+      modelId: cc.modelId,
+      endpoint: "responses",
+      upstreamFormat: "openai",
+      isStreaming: true,
+      usage,
+      durationMs: Date.now() - cc.tStart,
+      status,
+      requestId: lastRequestId,
+      isInternal: cc.isInternal,
+    })
+  })
+}
+
+export async function handleResponses(c: Context) {
+  await checkRateLimit(state)
+
+  const incoming = await c.req.json<ResponsesRequestBody>()
+  const requestedModel =
+    typeof incoming.model === "string" ? incoming.model : ""
+  const mappedModel = resolveAndMapModelId(
+    requestedModel,
+    c,
+    state.models?.data ?? [],
+  )
+  const payload: ResponsesRequestBody = { ...incoming, model: mappedModel }
+  const isStreaming = Boolean(payload.stream)
+  const isInternal = c.req.header("x-internal-pricing-sync") === "1"
+  const tStart = Date.now()
+
+  let usedAccount: Account | undefined
+  const upstream = await withAccount(c, async (account) => {
+    usedAccount = account
+    return await fetchResponses(account, payload, mappedModel)
+  })
+
+  if (!usedAccount) {
+    // Should never happen — withAccount always invokes the callback.
+    throw new Error("No account selected for /responses request")
+  }
+
+  const cc: CallContext = {
+    account: usedAccount,
+    modelId: mappedModel,
+    isInternal,
+    tStart,
+  }
+
+  if (!isStreaming) return await handleNonStream(c, upstream, cc)
+  return handleStream(c, upstream, cc)
+}
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 000000000..af2423427
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleResponses } from "./handler"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    return await handleResponses(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/server.ts b/src/server.ts
index 8878709e8..46d7e92a8 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -9,6 +9,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -32,6 +33,7 @@ export function createServer(options: ServerOptions = {}): Hono {
   server.route("/chat/completions", completionRoutes)
   server.route("/models", modelRoutes)
   server.route("/embeddings", embeddingRoutes)
+  server.route("/responses", responsesRoutes)
   server.route("/usage", usageRoute)
   server.route("/token", tokenRoute)
 
@@ -39,6 +41,7 @@ export function createServer(options: ServerOptions = {}): Hono {
   server.route("/v1/chat/completions", completionRoutes)
   server.route("/v1/models", modelRoutes)
   server.route("/v1/embeddings", embeddingRoutes)
+  server.route("/v1/responses", responsesRoutes)
 
   // Anthropic compatible endpoints
   server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 000000000..4aff8b535
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,99 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type { ApiContext } from "~/lib/api-config"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+
+import {
+  type ChatCompletionChunk,
+  type ChatCompletionResponse,
+  type ChatCompletionsPayload,
+} from "./create-chat-completions"
+import {
+  makeResponsesStreamState,
+  translateChatToResponses,
+  translateResponsesEventToChatChunks,
+  translateResponsesToChat,
+  type ResponsesFinalResponse,
+} from "./responses-translation"
+
+/**
+ * Call Copilot's `/responses` endpoint while accepting and emitting the
+ * OpenAI chat.completions wire format used everywhere else in this proxy.
+ *
+ * The shape of the return value mirrors `createChatCompletions` exactly:
+ *   - non-streaming → `ChatCompletionResponse`
+ *   - streaming     → `AsyncIterable<{ data?: string }>` whose `data` payloads
+ *                     are stringified `ChatCompletionChunk` objects.
+ *
+ * This means upstream handlers (chat-completions, messages) and the usage
+ * accumulator do not need to learn about the Responses protocol.
+ */
+export const createResponses = async (
+  ctx: ApiContext,
+  payload: ChatCompletionsPayload,
+): Promise<ChatCompletionResponse | AsyncIterable<{ data?: string }>> => {
+  if (!ctx.account.copilotToken) throw new Error("Copilot token not found")
+
+  const responsesPayload = translateChatToResponses(payload)
+
+  const enableVision = payload.messages.some(
+    (m) =>
+      Array.isArray(m.content) && m.content.some((p) => p.type === "image_url"),
+  )
+
+  const isAgentCall = payload.messages.some((m) =>
+    ["assistant", "tool"].includes(m.role),
+  )
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(ctx, enableVision),
+    "X-Initiator": isAgentCall ? "agent" : "user",
+  }
+
+  const response = await fetch(`${copilotBaseUrl(ctx)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(responsesPayload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create responses", response)
+    throw new HTTPError("Failed to create responses", response)
+  }
+
+  if (payload.stream) {
+    const upstream = events(response)
+    return translateStreamToChatChunks(upstream)
+  }
+
+  const raw = (await response.json()) as ResponsesFinalResponse
+  return translateResponsesToChat(raw)
+}
+
+async function* translateStreamToChatChunks(
+  upstream: AsyncIterable<{ data?: string; event?: string }>,
+): AsyncIterable<{ data?: string }> {
+  const state = makeResponsesStreamState()
+  for await (const ev of upstream) {
+    if (!ev.data) continue
+    if (ev.data === "[DONE]") {
+      yield { data: "[DONE]" }
+      return
+    }
+    let parsed: unknown
+    try {
+      parsed = JSON.parse(ev.data)
+    } catch {
+      continue
+    }
+    const chunks: Array<ChatCompletionChunk> =
+      translateResponsesEventToChatChunks(parsed, state)
+    for (const c of chunks) {
+      yield { data: JSON.stringify(c) }
+    }
+  }
+  yield { data: "[DONE]" }
+}
diff --git a/src/services/copilot/responses-translation.ts b/src/services/copilot/responses-translation.ts
new file mode 100644
index 000000000..ca6d8103a
--- /dev/null
+++ b/src/services/copilot/responses-translation.ts
@@ -0,0 +1,520 @@
+/**
+ * Translate between the OpenAI chat.completions wire format used internally by
+ * this proxy and GitHub Copilot's `/responses` wire format.
+ *
+ * gpt-5* family models on GitHub Copilot are ONLY accessible through the
+ * `/responses` endpoint — the same payload sent to `/chat/completions` returns
+ * `unsupported_api_for_model`. To avoid touching every upstream handler we
+ * translate the request on the way out and translate the response back into
+ * `ChatCompletion` shape so the rest of the pipeline (Anthropic translation,
+ * usage normalization, recording) doesn't need to know the difference.
+ */
+
+import type {
+  ChatCompletionChunk,
+  ChatCompletionResponse,
+  ChatCompletionsPayload,
+  ContentPart,
+  Message,
+  ToolCall,
+} from "./create-chat-completions"
+
+// ---------- Outgoing payload translation ----------
+
+interface ResponsesInputContentPart {
+  type: "input_text" | "output_text" | "input_image"
+  text?: string
+  image_url?: string
+}
+
+interface ResponsesMessageItem {
+  type: "message"
+  role: "user" | "assistant" | "system" | "developer"
+  content: Array<ResponsesInputContentPart>
+}
+
+interface ResponsesFunctionCallItem {
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+}
+
+interface ResponsesFunctionCallOutputItem {
+  type: "function_call_output"
+  call_id: string
+  output: string
+}
+
+type ResponsesInputItem =
+  | ResponsesMessageItem
+  | ResponsesFunctionCallItem
+  | ResponsesFunctionCallOutputItem
+
+export interface ResponsesPayload {
+  model: string
+  instructions?: string
+  input: Array<ResponsesInputItem>
+  tools?: Array<{
+    type: "function"
+    name: string
+    description?: string
+    parameters: Record<string, unknown>
+  }>
+  tool_choice?:
+    | "auto"
+    | "required"
+    | "none"
+    | { type: "function"; name: string }
+  stream?: boolean | null
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  user?: string | null
+  metadata?: Record<string, string> | null
+}
+
+function partsToInputContent(
+  parts: string | Array<ContentPart> | null,
+  role: "user" | "assistant" | "system" | "developer",
+): Array<ResponsesInputContentPart> {
+  if (parts === null) return []
+  const textType: "input_text" | "output_text" =
+    role === "assistant" ? "output_text" : "input_text"
+  if (typeof parts === "string") {
+    return parts ? [{ type: textType, text: parts }] : []
+  }
+  const out: Array<ResponsesInputContentPart> = []
+  for (const p of parts) {
+    if (p.type === "text") {
+      out.push({ type: textType, text: p.text })
+    } else {
+      out.push({ type: "input_image", image_url: p.image_url.url })
+    }
+  }
+  return out
+}
+
+function toolMessageText(content: Message["content"]): string {
+  if (typeof content === "string") return content
+  if (!Array.isArray(content)) return ""
+  return content
+    .filter(
+      (p): p is Extract<ContentPart, { type: "text" }> => p.type === "text",
+    )
+    .map((p) => p.text)
+    .join("\n")
+}
+
+function pushSystemChunks(m: Message, chunks: Array<string>): void {
+  if (typeof m.content === "string") {
+    chunks.push(m.content)
+    return
+  }
+  if (Array.isArray(m.content)) {
+    for (const p of m.content) {
+      if (p.type === "text") chunks.push(p.text)
+    }
+  }
+}
+
+function pushAssistant(m: Message, items: Array<ResponsesInputItem>): void {
+  const content = partsToInputContent(m.content, "assistant")
+  if (content.length > 0) {
+    items.push({ type: "message", role: "assistant", content })
+  }
+  if (m.tool_calls) {
+    for (const tc of m.tool_calls) {
+      items.push({
+        type: "function_call",
+        call_id: tc.id,
+        name: tc.function.name,
+        arguments: tc.function.arguments,
+      })
+    }
+  }
+}
+
+function mapMessage(
+  m: Message,
+  instructionsChunks: Array<string>,
+  items: Array<ResponsesInputItem>,
+): void {
+  if (m.role === "system" || m.role === "developer") {
+    pushSystemChunks(m, instructionsChunks)
+    return
+  }
+  if (m.role === "tool") {
+    items.push({
+      type: "function_call_output",
+      call_id: m.tool_call_id ?? "",
+      output: toolMessageText(m.content),
+    })
+    return
+  }
+  if (m.role === "assistant") {
+    pushAssistant(m, items)
+    return
+  }
+  items.push({
+    type: "message",
+    role: "user",
+    content: partsToInputContent(m.content, "user"),
+  })
+}
+
+function mapToolChoice(
+  tc: ChatCompletionsPayload["tool_choice"],
+): ResponsesPayload["tool_choice"] {
+  if (typeof tc === "string") return tc
+  if (tc && typeof tc === "object") {
+    return { type: "function", name: tc.function.name }
+  }
+  return undefined
+}
+
+export function translateChatToResponses(
+  payload: ChatCompletionsPayload,
+): ResponsesPayload {
+  const instructionsChunks: Array<string> = []
+  const items: Array<ResponsesInputItem> = []
+
+  for (const m of payload.messages) {
+    mapMessage(m, instructionsChunks, items)
+  }
+
+  const tools = payload.tools?.map((t) => ({
+    type: "function" as const,
+    name: t.function.name,
+    description: t.function.description,
+    parameters: t.function.parameters,
+  }))
+
+  const instructions =
+    instructionsChunks.length > 0 ? instructionsChunks.join("\n\n") : undefined
+
+  return {
+    model: payload.model,
+    instructions,
+    input: items,
+    tools,
+    tool_choice: mapToolChoice(payload.tool_choice),
+    stream: payload.stream,
+    temperature: payload.temperature,
+    top_p: payload.top_p,
+    max_output_tokens: payload.max_tokens,
+    user: payload.user,
+  }
+}
+
+// ---------- Non-streaming response translation ----------
+
+interface ResponsesOutputText {
+  type: "output_text"
+  text: string
+}
+
+interface ResponsesOutputMessage {
+  type: "message"
+  id?: string
+  role: "assistant"
+  content: Array<ResponsesOutputText | { type: string; text?: string }>
+}
+
+interface ResponsesOutputFunctionCall {
+  type: "function_call"
+  id?: string
+  call_id: string
+  name: string
+  arguments: string
+}
+
+type ResponsesOutputItem =
+  | ResponsesOutputMessage
+  | ResponsesOutputFunctionCall
+  | { type: "reasoning"; summary?: unknown }
+  | { type: string; [key: string]: unknown }
+
+export interface ResponsesUsage {
+  input_tokens?: number
+  output_tokens?: number
+  total_tokens?: number
+  input_tokens_details?: { cached_tokens?: number }
+  output_tokens_details?: { reasoning_tokens?: number }
+}
+
+export interface ResponsesFinalResponse {
+  id: string
+  object?: string
+  created_at?: number
+  status?: string
+  model: string
+  output: Array<ResponsesOutputItem>
+  usage?: ResponsesUsage
+}
+
+function extractText(item: ResponsesOutputMessage): string {
+  let buf = ""
+  for (const c of item.content) {
+    if (c.type === "output_text" && typeof c.text === "string") {
+      buf += c.text
+    }
+  }
+  return buf
+}
+
+function makeUsage(usage: ResponsesUsage | undefined) {
+  if (!usage) return undefined
+  const inputTokens = usage.input_tokens ?? 0
+  const outputTokens = usage.output_tokens ?? 0
+  const cached = usage.input_tokens_details?.cached_tokens
+  return {
+    prompt_tokens: inputTokens,
+    completion_tokens: outputTokens,
+    total_tokens: usage.total_tokens ?? inputTokens + outputTokens,
+    prompt_tokens_details:
+      cached === undefined ? undefined : { cached_tokens: cached },
+  }
+}
+
+export function translateResponsesToChat(
+  res: ResponsesFinalResponse,
+): ChatCompletionResponse {
+  let textBuf = ""
+  const toolCalls: Array<ToolCall> = []
+  for (const item of res.output) {
+    if (item.type === "message") {
+      textBuf += extractText(item as ResponsesOutputMessage)
+    } else if (item.type === "function_call") {
+      const fc = item as ResponsesOutputFunctionCall
+      toolCalls.push({
+        id: fc.call_id || fc.id || "",
+        type: "function",
+        function: { name: fc.name, arguments: fc.arguments },
+      })
+    }
+  }
+
+  const finish_reason: ChatCompletionResponse["choices"][number]["finish_reason"] =
+    toolCalls.length > 0 ? "tool_calls" : "stop"
+
+  return {
+    id: res.id,
+    object: "chat.completion",
+    created: res.created_at ?? Math.floor(Date.now() / 1000),
+    model: res.model,
+    choices: [
+      {
+        index: 0,
+        message: {
+          role: "assistant",
+          content: textBuf || null,
+          ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
+        },
+        logprobs: null,
+        finish_reason,
+      },
+    ],
+    usage: makeUsage(res.usage),
+  }
+}
+
+// ---------- Streaming translation ----------
+
+export interface ResponsesStreamState {
+  id?: string
+  model?: string
+  toolCalls: Map<number, { id: string; name: string; index: number }>
+  toolCallOrder: Array<string>
+}
+
+export function makeResponsesStreamState(): ResponsesStreamState {
+  return { toolCalls: new Map(), toolCallOrder: [] }
+}
+
+interface ParsedEvent {
+  type?: string
+  response?: {
+    id?: string
+    model?: string
+    usage?: ResponsesUsage
+    output?: Array<ResponsesOutputItem>
+  }
+  item?: ResponsesOutputItem & {
+    output_index?: number
+    id?: string
+    call_id?: string
+    name?: string
+  }
+  output_index?: number
+  delta?: string
+  arguments?: string
+}
+
+function makeBase(state: ResponsesStreamState) {
+  return {
+    id: state.id ?? "responses-stream",
+    object: "chat.completion.chunk" as const,
+    created: Math.floor(Date.now() / 1000),
+    model: state.model ?? "",
+  }
+}
+
+function handleItemAdded(
+  e: ParsedEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk> {
+  const item = e.item
+  if (item?.type !== "function_call") return []
+  const callId = item.call_id ?? item.id ?? ""
+  const idx = item.output_index ?? state.toolCallOrder.length
+  state.toolCalls.set(idx, { id: callId, name: item.name ?? "", index: idx })
+  state.toolCallOrder.push(callId)
+  return [
+    {
+      ...makeBase(state),
+      choices: [
+        {
+          index: 0,
+          delta: {
+            tool_calls: [
+              {
+                index: idx,
+                id: callId,
+                type: "function",
+                function: { name: item.name ?? "", arguments: "" },
+              },
+            ],
+          },
+          finish_reason: null,
+          logprobs: null,
+        },
+      ],
+    },
+  ]
+}
+
+function handleTextDelta(
+  e: ParsedEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk> {
+  if (typeof e.delta !== "string" || e.delta.length === 0) return []
+  return [
+    {
+      ...makeBase(state),
+      choices: [
+        {
+          index: 0,
+          delta: { content: e.delta },
+          finish_reason: null,
+          logprobs: null,
+        },
+      ],
+    },
+  ]
+}
+
+function handleArgsDelta(
+  e: ParsedEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk> {
+  const idx = e.output_index ?? 0
+  const argsDelta = e.delta ?? ""
+  if (!argsDelta) return []
+  return [
+    {
+      ...makeBase(state),
+      choices: [
+        {
+          index: 0,
+          delta: {
+            tool_calls: [{ index: idx, function: { arguments: argsDelta } }],
+          },
+          finish_reason: null,
+          logprobs: null,
+        },
+      ],
+    },
+  ]
+}
+
+function usageChunk(
+  state: ResponsesStreamState,
+  u: ResponsesUsage,
+): ChatCompletionChunk {
+  const cached = u.input_tokens_details?.cached_tokens
+  const reasoning = u.output_tokens_details?.reasoning_tokens
+  return {
+    ...makeBase(state),
+    choices: [],
+    usage: {
+      prompt_tokens: u.input_tokens ?? 0,
+      completion_tokens: u.output_tokens ?? 0,
+      total_tokens:
+        u.total_tokens ?? (u.input_tokens ?? 0) + (u.output_tokens ?? 0),
+      prompt_tokens_details:
+        cached === undefined ? undefined : { cached_tokens: cached },
+      completion_tokens_details:
+        reasoning === undefined ? undefined : (
+          { accepted_prediction_tokens: 0, rejected_prediction_tokens: 0 }
+        ),
+    },
+  }
+}
+
+function handleCompleted(
+  e: ParsedEvent,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk> {
+  const resp = e.response
+  if (resp?.id) state.id = resp.id
+  if (resp?.model) state.model = resp.model
+  const finish_reason: NonNullable<
+    ChatCompletionChunk["choices"][number]["finish_reason"]
+  > = state.toolCallOrder.length > 0 ? "tool_calls" : "stop"
+
+  const chunks: Array<ChatCompletionChunk> = [
+    {
+      ...makeBase(state),
+      choices: [{ index: 0, delta: {}, finish_reason, logprobs: null }],
+    },
+  ]
+  if (resp?.usage) chunks.push(usageChunk(state, resp.usage))
+  return chunks
+}
+
+function emptyStop(state: ResponsesStreamState): Array<ChatCompletionChunk> {
+  return [
+    {
+      ...makeBase(state),
+      choices: [{ index: 0, delta: {}, finish_reason: "stop", logprobs: null }],
+    },
+  ]
+}
+
+/**
+ * Translate one parsed Responses-API SSE event into zero or more OpenAI
+ * chat.completion.chunk objects (already in object form — caller serializes).
+ */
+export function translateResponsesEventToChatChunks(
+  event: unknown,
+  state: ResponsesStreamState,
+): Array<ChatCompletionChunk> {
+  const e = event as ParsedEvent
+  const type = e.type ?? ""
+  if (type === "response.created" || type === "response.in_progress") {
+    if (e.response?.id) state.id = e.response.id
+    if (e.response?.model) state.model = e.response.model
+    return []
+  }
+  if (type === "response.output_item.added") return handleItemAdded(e, state)
+  if (type === "response.output_text.delta") return handleTextDelta(e, state)
+  if (type === "response.function_call_arguments.delta") {
+    return handleArgsDelta(e, state)
+  }
+  if (type === "response.completed") return handleCompleted(e, state)
+  if (type === "response.failed" || type === "response.incomplete") {
+    return emptyStop(state)
+  }
+  return []
+}
diff --git a/src/services/copilot/upstream-router.ts b/src/services/copilot/upstream-router.ts
new file mode 100644
index 000000000..e623076ea
--- /dev/null
+++ b/src/services/copilot/upstream-router.ts
@@ -0,0 +1,94 @@
+import consola from "consola"
+
+import type { ApiContext } from "~/lib/api-config"
+
+import { HTTPError } from "~/lib/error"
+import { state, type CopilotUpstreamEndpoint } from "~/lib/state"
+
+import {
+  createChatCompletions,
+  type ChatCompletionResponse,
+  type ChatCompletionsPayload,
+} from "./create-chat-completions"
+import { createResponses } from "./create-responses"
+
+type Result = ChatCompletionResponse | AsyncIterable<{ data?: string }>
+
+/**
+ * Pick the default upstream endpoint for a model.
+ *
+ * GitHub Copilot's gpt-5* family (gpt-5, gpt-5-mini, gpt-5.4, gpt-5.4-mini,
+ * gpt-5-codex, …) is only reachable through `/responses`. Everything else
+ * defaults to `/chat/completions`.
+ */
+function defaultEndpointFor(modelId: string): CopilotUpstreamEndpoint {
+  const id = modelId.toLowerCase()
+  if (id.startsWith("gpt-5")) return "responses"
+  return "chat"
+}
+
+/**
+ * Returns true when the upstream error is the well-known
+ * `unsupported_api_for_model` 400 — the signal that we picked the wrong
+ * endpoint and should flip.
+ */
+async function isUnsupportedApiError(err: unknown): Promise<boolean> {
+  if (!(err instanceof HTTPError)) return false
+  if (err.response.status !== 400) return false
+  try {
+    const text = await err.response.clone().text()
+    return text.includes("unsupported_api_for_model")
+  } catch {
+    return false
+  }
+}
+
+async function call(
+  endpoint: CopilotUpstreamEndpoint,
+  ctx: ApiContext,
+  payload: ChatCompletionsPayload,
+): Promise<Result> {
+  return endpoint === "responses" ?
+      createResponses(ctx, payload)
+    : createChatCompletions(ctx, payload)
+}
+
+/**
+ * Dispatch a chat-completions style request to whichever Copilot endpoint
+ * (`/chat/completions` or `/responses`) the given model actually supports.
+ *
+ * Decision order:
+ *   1. If we've previously observed this model succeed on an endpoint, use it.
+ *   2. Otherwise pick by name (gpt-5* → responses, else chat).
+ *   3. On `unsupported_api_for_model` 400, flip endpoints once and retry.
+ * Successful endpoint is memoized in `state.modelEndpointRoute` for the rest
+ * of the process lifetime.
+ */
+export async function dispatchChatCompletion(
+  ctx: ApiContext,
+  payload: ChatCompletionsPayload,
+): Promise<Result> {
+  const remembered = state.modelEndpointRoute.get(payload.model)
+  const primary = remembered ?? defaultEndpointFor(payload.model)
+
+  try {
+    const result = await call(primary, ctx, payload)
+    state.modelEndpointRoute.set(payload.model, primary)
+    return result
+  } catch (err) {
+    if (!(await isUnsupportedApiError(err))) throw err
+
+    const fallback: CopilotUpstreamEndpoint =
+      primary === "responses" ? "chat" : "responses"
+    consola.warn(
+      `Model "${payload.model}" rejected on /${
+        primary === "chat" ? "chat/completions" : "responses"
+      } with unsupported_api_for_model; retrying on /${
+        fallback === "chat" ? "chat/completions" : "responses"
+      }`,
+    )
+    const result = await call(fallback, ctx, payload)
+    state.modelEndpointRoute.set(payload.model, fallback)
+    return result
+  }
+}

From 8fa3db490bd1e1d56c22483f8b5327ea71bc6851 Mon Sep 17 00:00:00 2001
From: Bo Lu <lubobill1990@163.com>
Date: Thu, 14 May 2026 01:37:28 +0800
Subject: [PATCH 34/34] chore: release v0.9.3

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 10c27d7ba..6edbdb089 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@weavejam/copilot-proxy",
-  "version": "0.9.2",
+  "version": "0.9.3",
   "description": "Turn GitHub Copilot into OpenAI/Anthropic API compatible server. Usable with Claude Code!",
   "keywords": [
     "proxy",