feat: standardize handling of incomplete streams and improve finish_reason logic

basiltt · basiltt · commit e3b8b9e2feff · 2026-03-24T00:30:28.000+05:30
- Added `handleIncompleteStream` function to synthesize appropriate SSE events when streams terminate abruptly without proper completion markers.
- Introduced `messageStopSent` state for clearer tracking of stream termination.
- Corrected handling of `finish_reason` for scenarios where tool calls are present, ensuring proper continuation of pending operations.
- Updated tests and state interfaces to reflect new logic.
diff --git a/src/lib/request-logger.ts b/src/lib/request-logger.ts
@@ -111,7 +111,7 @@ export const requestLogger: MiddlewareHandler = async (c, next) => {
   const prefix = ok ? `${CYAN}◀${R}` : `${RED}✕${R}`
   const methodStr = pad(`${DIM}${method}${R}`, 4)
   const pathStr = pad(`${CYAN}${path}${R}`, 27)
-  const modelStr = pad(model ? `${YELLOW}${model}${R}` : "", 18)
+  const modelStr = pad(model ? `${YELLOW}${model}${R}` : "", 26)
   const sessionStr = pad(sessionId ? `${DIM}${sessionId}${R}` : "", 8)
   const streamStr = pad(stream === true ? `${BLUE}stream${R}` : "", 6)
   const statusStr = pad(colorStatus(status), 3)
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
@@ -276,6 +276,8 @@ export type AnthropicStreamEventData =
 // State for streaming translation
 export interface AnthropicStreamState {
   messageStartSent: boolean
+  /** Whether the terminal message_stop event has been emitted. */
+  messageStopSent: boolean
   contentBlockIndex: number
   contentBlockOpen: boolean
   /** Whether the currently open content block is a thinking block. */
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
@@ -392,6 +392,7 @@ async function pipeStreamToClient(
   const { thinkingEnabled, imageTokenOverhead = 0 } = options
   const streamState: AnthropicStreamState = {
     messageStartSent: false,
+    messageStopSent: false,
     contentBlockIndex: 0,
     contentBlockOpen: false,
     thinkingBlockOpen: false,
@@ -436,23 +437,7 @@ async function pipeStreamToClient(
       }
     }
 
-    // If the upstream stream ended without ever sending a usable chunk
-    // (e.g. the model returned an empty response or only unsupported
-    // event types), no message_start was emitted and the client sees an
-    // empty SSE connection with no indication of what went wrong.
-    // Emit a synthetic Anthropic error so the UI can surface the problem.
-    if (!streamState.messageStartSent) {
-      consola.warn(
-        "Copilot stream ended without producing any content — emitting error event",
-      )
-      const errorEvent = translateErrorToAnthropicErrorEvent(
-        "The model returned an empty response. This may indicate the model is unavailable or does not support this request.",
-      )
-      await stream.writeSSE({
-        event: errorEvent.type,
-        data: JSON.stringify(errorEvent),
-      })
-    }
+    await handleIncompleteStream(stream, streamState)
   } catch (error) {
     consola.error("Stream error from Copilot:", error)
 
@@ -480,6 +465,84 @@ async function pipeStreamToClient(
   }
 }
 
+/**
+ * Handles the case where the upstream stream ended without a proper Anthropic
+ * termination sequence (message_delta + message_stop).
+ *
+ * Two scenarios:
+ * 1. Stream never produced any content → emit a synthetic error event.
+ * 2. Stream started (message_start sent) but ended without finish_reason →
+ *    synthesize the missing termination events so Claude Code can proceed.
+ */
+async function handleIncompleteStream(
+  stream: SSEStreamingApi,
+  state: AnthropicStreamState,
+): Promise<void> {
+  if (!state.messageStartSent) {
+    // No usable chunks arrived at all.
+    consola.warn(
+      "Copilot stream ended without producing any content — emitting error event",
+    )
+    const errorEvent = translateErrorToAnthropicErrorEvent(
+      "The model returned an empty response. This may indicate the model is unavailable or does not support this request.",
+    )
+    await stream.writeSSE({
+      event: errorEvent.type,
+      data: JSON.stringify(errorEvent),
+    })
+    return
+  }
+
+  if (state.messageStopSent) {
+    return // Stream ended normally, nothing to do.
+  }
+
+  // The upstream stream started but ended without a chunk containing
+  // finish_reason — no message_delta / message_stop was ever sent.
+  // Some models (notably Gemini) can terminate the stream abruptly after
+  // emitting content or tool-call chunks.  Without a proper termination
+  // sequence Claude Code sees the SSE connection close with no indication
+  // of completion and treats the turn as abandoned / silently dead.
+  consola.warn(
+    "Copilot stream ended without finish_reason — synthesizing message_delta/message_stop",
+  )
+
+  if (state.contentBlockOpen) {
+    await stream.writeSSE({
+      event: "content_block_stop",
+      data: JSON.stringify({
+        type: "content_block_stop",
+        index: state.contentBlockIndex,
+      }),
+    })
+  }
+
+  // Determine the correct stop_reason: if tool calls were emitted
+  // during the stream, the model intended "tool_use"; otherwise
+  // default to "end_turn".
+  const hasToolCalls = Object.keys(state.toolCalls).length > 0
+  const stopReason = hasToolCalls ? "tool_use" : "end_turn"
+
+  await stream.writeSSE({
+    event: "message_delta",
+    data: JSON.stringify({
+      type: "message_delta",
+      delta: {
+        stop_reason: stopReason,
+        stop_sequence: null,
+      },
+      usage: {
+        input_tokens: 0,
+        output_tokens: 0,
+      },
+    }),
+  })
+  await stream.writeSSE({
+    event: "message_stop",
+    data: JSON.stringify({ type: "message_stop" }),
+  })
+}
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
@@ -387,25 +387,35 @@ export function translateToAnthropic(
 
   // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
 
+  // Some models (notably Gemini) intermittently return finish_reason "stop"
+  // even when they emitted tool calls. Correct this to "tool_calls" so Claude
+  // Code executes the pending tool calls instead of treating the turn as done.
+  const correctedStopReason =
+    allToolUseBlocks.length > 0 && stopReason === "stop" ?
+      "tool_calls"
+    : stopReason
+
   return {
     id: toAnthropicMessageId(response.id),
     type: "message",
     role: "assistant",
     model: response.model,
     content: [...allTextBlocks, ...allToolUseBlocks],
-    stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
+    stop_reason: mapOpenAIStopReasonToAnthropic(correctedStopReason),
     stop_sequence: null,
-    usage: {
-      input_tokens:
-        (response.usage?.prompt_tokens ?? 0)
-        - (response.usage?.prompt_tokens_details?.cached_tokens ?? 0),
-      output_tokens: response.usage?.completion_tokens ?? 0,
-      ...(response.usage?.prompt_tokens_details?.cached_tokens
-        !== undefined && {
-        cache_read_input_tokens:
-          response.usage.prompt_tokens_details.cached_tokens,
-      }),
-    },
+    usage: buildAnthropicUsage(response.usage),
+  }
+}
+
+function buildAnthropicUsage(usage: ChatCompletionResponse["usage"]) {
+  return {
+    input_tokens:
+      (usage?.prompt_tokens ?? 0)
+      - (usage?.prompt_tokens_details?.cached_tokens ?? 0),
+    output_tokens: usage?.completion_tokens ?? 0,
+    ...(usage?.prompt_tokens_details?.cached_tokens !== undefined && {
+      cache_read_input_tokens: usage.prompt_tokens_details.cached_tokens,
+    }),
   }
 }
 
diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts
@@ -236,11 +236,22 @@ export function translateChunkToAnthropicEvents(
       state.contentBlockOpen = false
     }
 
+    // Some models (notably Gemini) intermittently return finish_reason "stop"
+    // even when they emitted tool calls in the response. Claude Code interprets
+    // stop_reason "end_turn" as "model is done" and skips pending tool
+    // executions, causing the session to stall after a few rounds.
+    // Detect this mismatch and correct finish_reason to "tool_calls".
+    const hasToolCalls = Object.keys(state.toolCalls).length > 0
+    const correctedFinishReason =
+      hasToolCalls && choice.finish_reason === "stop" ?
+        "tool_calls"
+      : choice.finish_reason
+
     events.push(
       {
         type: "message_delta",
         delta: {
-          stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason),
+          stop_reason: mapOpenAIStopReasonToAnthropic(correctedFinishReason),
           stop_sequence: null,
         },
         usage: {
@@ -259,6 +270,7 @@ export function translateChunkToAnthropicEvents(
         type: "message_stop",
       },
     )
+    state.messageStopSent = true
   }
 
   return events
diff --git a/tests/anthropic-response.test.ts b/tests/anthropic-response.test.ts
@@ -249,6 +249,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => {
 
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
+      messageStopSent: false,
       contentBlockIndex: 0,
       contentBlockOpen: false,
       thinkingBlockOpen: false,
@@ -351,6 +352,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => {
     // Streaming translation requires state
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
+      messageStopSent: false,
       contentBlockIndex: 0,
       contentBlockOpen: false,
       thinkingBlockOpen: false,