feat: Add model cache and auto max_tokens based on model selection

ericc-ch · ericc-ch · commit 3c446341ba51 · 2025-02-28T08:51:42.000+07:00
diff --git a/src/lib/is-nullish.ts b/src/lib/is-nullish.ts
@@ -0,0 +1,2 @@
+export const isNullish = (value: unknown): value is null | undefined =>
+  value === null || value === undefined
diff --git a/src/lib/models.ts b/src/lib/models.ts
@@ -0,0 +1,13 @@
+import type { GetModelsResponse } from "~/services/copilot/get-models/types"
+
+export const modelsCache = {
+  _models: null as GetModelsResponse | null,
+
+  setModels(models: GetModelsResponse) {
+    this._models = models
+  },
+
+  getModels() {
+    return this._models
+  },
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
@@ -6,7 +6,9 @@ import { streamSSE, type SSEMessage } from "hono/streaming"
 import type { ChatCompletionsPayload } from "~/services/copilot/chat-completions/types"
 import type { ChatCompletionChunk } from "~/services/copilot/chat-completions/types-streaming"
 
+import { isNullish } from "~/lib/is-nullish"
 import { logger } from "~/lib/logger"
+import { modelsCache } from "~/lib/models"
 import { chatCompletions } from "~/services/copilot/chat-completions/service"
 import { chatCompletionsStream } from "~/services/copilot/chat-completions/service-streaming"
 
@@ -36,7 +38,19 @@ function createCondensedStreamingResponse(
 }
 
 export async function handlerStreaming(c: Context) {
-  const payload = await c.req.json<ChatCompletionsPayload>()
+  const models = modelsCache.getModels()
+  let payload = await c.req.json<ChatCompletionsPayload>()
+
+  if (isNullish(payload.max_tokens)) {
+    const selectedModel = models?.data.find(
+      (model) => model.id === payload.model,
+    )
+
+    payload = {
+      ...payload,
+      max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
+    }
+  }
 
   // Convert request headers to a regular object from Headers
   const requestHeaders = c.req.header()
diff --git a/src/services/api-instance.ts b/src/services/api-instance.ts
@@ -6,6 +6,7 @@ import {
   GITHUB_API_CONFIG,
   GITHUB_WEB_API_CONFIG,
 } from "~/lib/constants"
+import { modelsCache } from "~/lib/models"
 import { tokenService } from "~/lib/token"
 
 export const copilot = ofetch.create({
@@ -28,6 +29,13 @@ export const copilot = ofetch.create({
     }
   },
 
+  onResponse({ response }) {
+    if (response.url.endsWith("/models") && response._data) {
+      // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
+      modelsCache.setModels(response._data)
+    }
+  },
+
   onResponseError({ error, response, options }) {
     if (error instanceof FetchError) {
       consola.error(

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+export const isNullish = (value: unknown): value is null \| undefined =>`
	`2`	`+ value === null \|\| value === undefined`