From 9ae4d087d98e434935f323caa1edba5219a9ea7d Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 26 Jun 2026 23:11:23 -0700 Subject: [PATCH 1/5] feat(fixtures): add blocks type, validator, and loader/factory normalization (#274) Introduce the fixture blocks array type with validation, and normalize loader/factory paths so block-ordered fixtures flow through consistently. --- src/fixture-loader.ts | 109 +++++++++++++++++++++ src/helpers.ts | 220 ++++++++++++++++++++++++++++++++++++++++++ src/index.ts | 2 + src/types.ts | 46 +++++++++ 4 files changed, 377 insertions(+) diff --git a/src/fixture-loader.ts b/src/fixture-loader.ts index 5caa8d10..498749e6 100644 --- a/src/fixture-loader.ts +++ b/src/fixture-loader.ts @@ -46,6 +46,25 @@ export function normalizeResponse(raw: FixtureFileResponse): FixtureResponse { }); } + // Carry the optional ordered `blocks` array through, mirroring the + // toolCalls[].arguments idiom above: auto-stringify object `arguments` on + // each `toolCall` block. Gated on Array.isArray so a malformed (non-array) + // `blocks` value passes through untouched rather than crashing — downstream + // validation/builders own shape rejection. Absent `blocks` → key absent. + if (Array.isArray(response.blocks)) { + response.blocks = (response.blocks as Array>).map((block) => { + if ( + block != null && + block.type === "toolCall" && + typeof block.arguments === "object" && + block.arguments !== null + ) { + return { ...block, arguments: JSON.stringify(block.arguments) }; + } + return block; + }); + } + return response as unknown as FixtureResponse; } @@ -257,6 +276,91 @@ function validateWebSearches( } } +function validateBlocks( + response: { blocks?: unknown }, + fixtureIndex: number, + results: ValidationResult[], +): void { + if (response.blocks === undefined) return; + + // Mirrors the toolCalls checks: reject malformed `blocks` at LOAD time so a + // bad blocks array never reaches the dispatch/builder (where + // resolveFixtureBlocks throws AFTER the journal has already recorded + // status:200, yielding a journal-200/client-500 mismatch). #274 F3+F8. + if (!Array.isArray(response.blocks)) { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks must be an array, got ${typeof response.blocks}`, + }); + return; + } + + for (let j = 0; j < response.blocks.length; j++) { + const block = response.blocks[j] as Record | null | undefined; + if (typeof block !== "object" || block === null) { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}] must be an object`, + }); + continue; + } + if (block.type !== "text" && block.type !== "toolCall") { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}].type must be "text" or "toolCall", got ${JSON.stringify(block.type)}`, + }); + continue; + } + if (block.type === "text") { + if (typeof block.text !== "string") { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}].text must be a string, got ${typeof block.text}`, + }); + } + } else { + // toolCall block — mirror toolCalls[] name + arguments checks. + if (typeof block.name !== "string" || block.name === "") { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}].name must be a non-empty string`, + }); + } + // `arguments` is JSON-string in runtime form (normalizeResponse already + // stringified object/array args); accept a valid-JSON string or an object. + if (typeof block.arguments === "string") { + try { + JSON.parse(block.arguments); + } catch { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}].arguments is not valid JSON: ${block.arguments}`, + }); + } + } else if (typeof block.arguments !== "object" || block.arguments === null) { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}].arguments must be a JSON string or object, got ${typeof block.arguments}`, + }); + } + if (block.id !== undefined && typeof block.id !== "string") { + results.push({ + severity: "error", + fixtureIndex, + message: `blocks[${j}].id must be a string, got ${typeof block.id}`, + }); + } + } + } +} + export function validateFixtures(fixtures: Fixture[]): ValidationResult[] { const results: ValidationResult[] = []; @@ -348,6 +452,11 @@ export function validateFixtures(fixtures: Fixture[]): ValidationResult[] { validateWebSearches(response, i, results); } + // Optional ordered `blocks` checks — validated whenever present on the + // response, regardless of which content/toolCalls guard matched, so a + // malformed blocks array is rejected at LOAD rather than at dispatch. + validateBlocks(response as { blocks?: unknown }, i, results); + // Tool call response checks if (isToolCallResponse(response)) { if (response.toolCalls.length === 0) { diff --git a/src/helpers.ts b/src/helpers.ts index 2bb47b32..40af7402 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -21,6 +21,7 @@ import type { RawJSONResponse, SSEChunk, ToolCall, + FixtureBlock, ChatCompletion, ResponseOverrides, } from "./types.js"; @@ -236,6 +237,24 @@ function normalizeFactoryResponse(raw: FixtureResponse): FixtureResponse { return { ...tc }; }); } + // Mirror the toolCalls[].arguments idiom for the optional ordered `blocks` + // array: auto-stringify object `arguments` on each `toolCall` block so a + // programmatic ResponseFactory may return objects (resolveFixtureBlocks + // requires string `arguments`). Text blocks and string arguments pass + // through unchanged. Matches the loader's block handling. + if (Array.isArray(r.blocks)) { + r.blocks = (r.blocks as Array>).map((block) => { + if ( + block != null && + block.type === "toolCall" && + typeof block.arguments === "object" && + block.arguments !== null + ) { + return { ...block, arguments: JSON.stringify(block.arguments) }; + } + return { ...block }; + }); + } return r as unknown as FixtureResponse; } @@ -278,6 +297,61 @@ export function isContentWithToolCallsResponse( ); } +/** + * Validate and pass through the ordered `blocks` field of a combined + * content+toolCalls fixture. Used ONLY on the new block-iteration path (when a + * fixture explicitly sets `blocks`); it is NOT a legacy-order reconstructor — + * fixtures without `blocks` never reach this function and keep their unchanged + * text-first path. + * + * An EMPTY `blocks` array is treated as "no blocks" by every builder's + * streaming gate (`blocks && blocks.length > 0`), so it falls back to the + * legacy `{content, toolCalls}` path and never reaches this function — the gate + * is the single source of truth for "has blocks". This validator therefore only + * ever runs on a non-empty array. + * + * Returns the blocks in array order. Each entry must be a valid + * {@link FixtureBlock}: a `text` block with a string `text`, or a `toolCall` + * block with string `name` + `arguments` (and an optional string `id`). + * Throws on a malformed array or entry — same fail-fast idiom as the other + * fixture validators in this module (see e.g. the factory guard at + * {@link resolveResponse}). + */ +export function resolveFixtureBlocks(blocks: FixtureBlock[]): FixtureBlock[] { + if (!Array.isArray(blocks)) { + throw new Error(`Invalid fixture blocks: expected an array, got ${typeof blocks}`); + } + blocks.forEach((block, i) => { + if (block === null || typeof block !== "object") { + throw new Error(`Invalid fixture block at index ${i}: expected an object`); + } + const b = block as Record; + if (b.type === "text") { + if (typeof b.text !== "string") { + throw new Error( + `Invalid fixture block at index ${i}: "text" block requires a string "text" field`, + ); + } + } else if (b.type === "toolCall") { + if (typeof b.name !== "string" || typeof b.arguments !== "string") { + throw new Error( + `Invalid fixture block at index ${i}: "toolCall" block requires string "name" and "arguments" fields`, + ); + } + if (b.id !== undefined && typeof b.id !== "string") { + throw new Error( + `Invalid fixture block at index ${i}: "toolCall" block "id" must be a string when present`, + ); + } + } else { + throw new Error( + `Invalid fixture block at index ${i}: unknown type ${JSON.stringify(b.type)} (expected "text" or "toolCall")`, + ); + } + }); + return blocks; +} + export function isErrorResponse(r: FixtureResponse): r is ErrorResponse { return ( "error" in r && @@ -752,6 +826,7 @@ export function buildContentWithToolCallsChunks( chunkSize: number, reasoning?: string, overrides?: ResponseOverrides, + blocks?: FixtureBlock[], ): SSEChunk[] { const id = overrides?.id ?? generateId(); const created = overrides?.created ?? Math.floor(Date.now() / 1000); @@ -759,6 +834,143 @@ export function buildContentWithToolCallsChunks( const chunks: SSEChunk[] = []; const fingerprint = overrides?.systemFingerprint; + if (blocks && blocks.length > 0) { + // NEW: emit chunks in fixture block array order. + // + // DEGENERATE PROVIDER NOTE: in OpenAI chat-completions, `delta.content` and + // `delta.tool_calls` are SEPARATE channels that the client merges with no + // positional interleaving. So "tool-call-before-text" is NOT semantically + // observable to a real client — it reassembles content and tool calls into + // their own buckets regardless of chunk order. We still emit honest + // array-order chunks (the SSE chunk SEQUENCE is the contract this path + // asserts), but we do NOT fake interleaving the channel cannot express. + const ordered = resolveFixtureBlocks(blocks); + + // Reasoning chunks (emitted first, OpenRouter format) — unchanged from legacy. + if (reasoning) { + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model: effectiveModel, + choices: [ + { index: 0, delta: { reasoning_content: slice }, logprobs: null, finish_reason: null }, + ], + ...(fingerprint !== undefined && { system_fingerprint: fingerprint }), + }); + } + } + + // Role chunk — preserved exactly as the legacy path. + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model: effectiveModel, + choices: [ + { + index: 0, + delta: { role: overrides?.role ?? "assistant", content: "" }, + logprobs: null, + finish_reason: null, + }, + ], + ...(fingerprint !== undefined && { system_fingerprint: fingerprint }), + }); + + // Tool-call `index` is assigned in encounter order across the block array. + let tcIdx = 0; + for (const block of ordered) { + if (block.type === "text") { + for (let i = 0; i < block.text.length; i += chunkSize) { + const slice = block.text.slice(i, i + chunkSize); + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model: effectiveModel, + choices: [{ index: 0, delta: { content: slice }, logprobs: null, finish_reason: null }], + ...(fingerprint !== undefined && { system_fingerprint: fingerprint }), + }); + } + } else { + const tcId = block.id || generateToolCallId(); + + // Initial tool call chunk (id + function name) + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model: effectiveModel, + choices: [ + { + index: 0, + delta: { + tool_calls: [ + { + index: tcIdx, + id: tcId, + type: "function", + function: { name: block.name, arguments: "" }, + }, + ], + }, + logprobs: null, + finish_reason: null, + }, + ], + ...(fingerprint !== undefined && { system_fingerprint: fingerprint }), + }); + + // Argument streaming chunks + const args = block.arguments; + for (let i = 0; i < args.length; i += chunkSize) { + const slice = args.slice(i, i + chunkSize); + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model: effectiveModel, + choices: [ + { + index: 0, + delta: { + tool_calls: [{ index: tcIdx, function: { arguments: slice } }], + }, + logprobs: null, + finish_reason: null, + }, + ], + ...(fingerprint !== undefined && { system_fingerprint: fingerprint }), + }); + } + tcIdx++; + } + } + + // Finish chunk — preserved exactly as the legacy path. + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model: effectiveModel, + choices: [ + { + index: 0, + delta: {}, + logprobs: null, + finish_reason: overrides?.finishReason ?? "tool_calls", + }, + ], + ...(fingerprint !== undefined && { system_fingerprint: fingerprint }), + }); + + return chunks; + } + + // EXISTING legacy code, byte-for-byte UNCHANGED. // Reasoning chunks (emitted before content, OpenRouter format) if (reasoning) { for (let i = 0; i < reasoning.length; i += chunkSize) { @@ -881,6 +1093,14 @@ export function buildContentWithToolCallsChunks( return chunks; } +// NOTE (#274): this NON-streaming OpenAI chat-completions builder is +// intentionally degenerate w.r.t. `blocks` ordering. A chat.completion puts +// `message.content` and `message.tool_calls` in SEPARATE fields on a single +// message object — they are NOT a positionally-observable array, so a +// tool-first `blocks` fixture cannot be expressed in the wire shape. Honoring +// block order here would be a no-op, so the legacy content+tool_calls fields +// are unchanged. (Order-observable surfaces — Claude `content[]`, Gemini +// `parts[]`, Responses `output[]` — DO honor block order; see those builders.) export function buildContentWithToolCallsCompletion( content: string, toolCalls: ToolCall[], diff --git a/src/index.ts b/src/index.ts index 838f3cd3..b93c3030 100644 --- a/src/index.ts +++ b/src/index.ts @@ -133,6 +133,7 @@ export { isTextResponse, isToolCallResponse, isContentWithToolCallsResponse, + resolveFixtureBlocks, isErrorResponse, isEmbeddingResponse, isImageResponse, @@ -334,6 +335,7 @@ export type { FixtureMatch, TextResponse, ToolCall, + FixtureBlock, ToolCallResponse, ErrorResponse, EmbeddingResponse, diff --git a/src/types.ts b/src/types.ts index dceaca5b..51fae37e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -186,6 +186,20 @@ export interface ToolCall { id?: string; } +/** + * A single ordered streaming block for a {@link ContentWithToolCallsResponse}. + * + * When a combined content+toolCalls fixture sets the optional `blocks` field, + * builders stream the blocks in array order — enabling tool-call-before-text + * and interleaved orderings that the legacy `{ content, toolCalls }` shape + * (always text-first) cannot express. A `text` block carries a text segment; a + * `toolCall` block mirrors {@link ToolCall} (`name` + JSON-string `arguments`, + * optional `id`). + */ +export type FixtureBlock = + | { type: "text"; text: string } + | { type: "toolCall"; name: string; arguments: string; id?: string }; + export interface ToolCallResponse extends ResponseOverrides { toolCalls: ToolCall[]; reasoning?: string; @@ -199,6 +213,13 @@ export interface ToolCallResponse extends ResponseOverrides { export interface ContentWithToolCallsResponse extends ResponseOverrides { content: string; toolCalls: ToolCall[]; + /** + * Optional ordered streaming blocks. When present, builders stream these in + * array order (tool-first / interleaved); when absent, the legacy + * `{ content, toolCalls }` text-first path runs unchanged. Purely additive — + * `isContentWithToolCallsResponse` still requires `content` + `toolCalls`. + */ + blocks?: FixtureBlock[]; reasoning?: string; /** Real Anthropic thinking-block signature; see {@link TextResponse.reasoningSignature}. */ reasoningSignature?: string; @@ -416,6 +437,22 @@ export interface FixtureFileToolCall { id?: string; } +/** + * On-disk counterpart of {@link FixtureBlock}. A `toolCall` block's + * `arguments` is relaxed exactly like {@link FixtureFileToolCall} so authors + * may write a JSON object/array; the loader JSON.stringifies it into the + * runtime string form. Normalizes to a {@link FixtureBlock}. + */ +export type FixtureFileBlock = + | { type: "text"; text: string } + | { + type: "toolCall"; + name: string; + /** Accepts a JSON object or array for convenience — the loader will JSON.stringify it. */ + arguments: string | Record | unknown[]; + id?: string; + }; + export interface FixtureFileToolCallResponse extends ResponseOverrides { toolCalls: FixtureFileToolCall[]; reasoning?: string; @@ -441,6 +478,15 @@ export interface FixtureFileContentWithToolCallsResponse extends ResponseOverrid /** Accepts a JSON object or array (structured output) — the loader will JSON.stringify it. */ content: string | Record | unknown[]; toolCalls: FixtureFileToolCall[]; + /** + * Optional ordered streaming blocks (mirrors the in-memory + * {@link ContentWithToolCallsResponse.blocks}). When present, builders stream + * these in array order (tool-first / interleaved); a `toolCall` block's + * object `arguments` is auto-stringified just like `toolCalls[].arguments`. + * Absent → legacy text-first path runs unchanged. Purely additive. Uses the + * on-disk {@link FixtureFileBlock} shape with relaxed `arguments`. + */ + blocks?: FixtureFileBlock[]; reasoning?: string; /** Real Anthropic thinking-block signature; see {@link TextResponse.reasoningSignature}. */ reasoningSignature?: string; From fd7cb17276a5dfca5324b33734ae6916746daee1 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 26 Jun 2026 23:11:34 -0700 Subject: [PATCH 2/5] feat(fixtures): stream and non-stream fixture blocks in array order across providers (#274) Emit fixture blocks in their declared array order for both streaming and non-streaming paths across the Anthropic, OpenAI, Gemini, Ollama, Responses, and WebSocket providers. --- src/gemini.ts | 92 +++++++++++++++- src/messages.ts | 139 +++++++++++++++++++++++-- src/ollama.ts | 91 ++++++++++++++++ src/responses.ts | 248 +++++++++++++++++++++++++++++++++----------- src/server.ts | 1 + src/ws-responses.ts | 1 + 6 files changed, 502 insertions(+), 70 deletions(-) diff --git a/src/gemini.ts b/src/gemini.ts index f2e8a9f7..b7b349b0 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -12,6 +12,7 @@ import type { ChatCompletionRequest, ChatMessage, Fixture, + FixtureBlock, HandlerDefaults, RecordedTimings, RecordProviderKey, @@ -31,6 +32,7 @@ import { flattenHeaders, getContext, getTestId, + resolveFixtureBlocks, resolveResponse, resolveStrictMode, resolveReasoningForModel, @@ -442,6 +444,7 @@ function buildGeminiContentWithToolCallsStreamChunks( logger: Logger, reasoning?: string, overrides?: ResponseOverrides, + blocks?: FixtureBlock[], ): GeminiResponseChunk[] { const chunks: GeminiResponseChunk[] = []; @@ -460,6 +463,69 @@ function buildGeminiContentWithToolCallsStreamChunks( } } + if (blocks && blocks.length > 0) { + // NEW path (#274): stream chunks whose parts follow the blocks' ARRAY ORDER, + // so a tool-first / interleaved fixture emits its functionCall part before + // its text part. Gemini's ordered `parts` make this fully expressible. The + // terminal block carries the finishReason regardless of its type. Legacy + // fixtures (no `blocks`) never enter here — see the else branch below. + const resolved = resolveFixtureBlocks(blocks); + resolved.forEach((block, i) => { + const isLast = i === resolved.length - 1; + const finishReason = isLast + ? geminiFinishReason(overrides?.finishReason, "FUNCTION_CALL") + : undefined; + if (block.type === "toolCall") { + const part = parseToolCallPart( + { name: block.name, arguments: block.arguments, id: block.id }, + logger, + ); + chunks.push({ + candidates: [ + { + content: { role: "model", parts: [part] }, + ...(finishReason ? { finishReason } : {}), + index: 0, + }, + ], + ...(isLast ? { usageMetadata: geminiUsageMetadata(overrides) } : {}), + }); + } else { + const text = block.text; + if (text.length === 0) { + chunks.push({ + candidates: [ + { + content: { role: "model", parts: [{ text: "" }] }, + ...(finishReason ? { finishReason } : {}), + index: 0, + }, + ], + ...(isLast ? { usageMetadata: geminiUsageMetadata(overrides) } : {}), + }); + } else { + for (let j = 0; j < text.length; j += chunkSize) { + const slice = text.slice(j, j + chunkSize); + const lastSlice = j + chunkSize >= text.length; + const sliceFinish = isLast && lastSlice ? finishReason : undefined; + chunks.push({ + candidates: [ + { + content: { role: "model", parts: [{ text: slice }] }, + ...(sliceFinish ? { finishReason: sliceFinish } : {}), + index: 0, + }, + ], + ...(isLast && lastSlice ? { usageMetadata: geminiUsageMetadata(overrides) } : {}), + }); + } + } + } + }); + + return chunks; + } + if (content.length === 0) { chunks.push({ candidates: [ @@ -505,13 +571,33 @@ function buildGeminiContentWithToolCallsResponse( logger: Logger, reasoning?: string, overrides?: ResponseOverrides, + blocks?: FixtureBlock[], ): GeminiResponseChunk { const parts: GeminiPart[] = []; if (reasoning) { parts.push({ text: reasoning, thought: true }); } - parts.push({ text: content }); - parts.push(...toolCalls.map((tc) => parseToolCallPart(tc, logger))); + + if (blocks && blocks.length > 0) { + // NEW PATH: the non-streaming `parts[]` array is positionally observable, so + // emit parts in the fixture's ARRAY ORDER (after any leading thought part). + // A toolCall block before a text block therefore yields a functionCall part + // ahead of the text — matching the streaming path for the same `blocks`. + const resolved = resolveFixtureBlocks(blocks); + for (const block of resolved) { + if (block.type === "toolCall") { + parts.push( + parseToolCallPart({ name: block.name, arguments: block.arguments, id: block.id }, logger), + ); + } else { + parts.push({ text: block.text }); + } + } + } else { + // LEGACY PATH (unchanged): text part first, then functionCall parts. + parts.push({ text: content }); + parts.push(...toolCalls.map((tc) => parseToolCallPart(tc, logger))); + } return { candidates: [ @@ -920,6 +1006,7 @@ export async function handleGemini( logger, effReasoning, overrides, + response.blocks, ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); @@ -931,6 +1018,7 @@ export async function handleGemini( logger, effReasoning, overrides, + response.blocks, ); const interruption = createInterruptionSignal(fixture); const completed = await writeGeminiSSEStream(res, chunks, { diff --git a/src/messages.ts b/src/messages.ts index a1a8c4b0..c0fbc7bb 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -13,6 +13,7 @@ import type { Fixture, HandlerDefaults, RecordedTimings, + FixtureBlock, ResponseOverrides, StreamingProfile, ToolCall, @@ -26,6 +27,7 @@ import { isToolCallResponse, isContentWithToolCallsResponse, isErrorResponse, + resolveFixtureBlocks, flattenHeaders, getTestId, resolveResponse, @@ -816,6 +818,7 @@ function buildClaudeContentWithToolCallsStreamEvents( overrides?: ResponseOverrides, reasoningSignature?: string, redactedThinking?: string[], + blocks?: FixtureBlock[], ): ClaudeSSEEvent[] { const msgId = overrides?.id ?? generateMessageId(); const effectiveModel = overrides?.model ?? model; @@ -842,10 +845,11 @@ function buildClaudeContentWithToolCallsStreamEvents( let blockIndex = 0; // Redacted-thinking blocks lead the turn (before thinking / text / tool_use); - // see the helper for the ordering caveat. + // see the helper for the ordering caveat. Applies to both the legacy and the + // ordered-`blocks` paths. blockIndex = pushRedactedThinkingStreamEvents(events, blockIndex, redactedThinking); - // Optional thinking block + // Optional thinking block — also shared by both paths. if (reasoning) { // Real Anthropic emits an empty `signature` on the thinking // `content_block_start`; the cryptographic signature arrives only via the @@ -879,6 +883,99 @@ function buildClaudeContentWithToolCallsStreamEvents( blockIndex++; } + if (blocks && blocks.length > 0) { + // NEW PATH: stream `text`/`tool_use` content blocks in the fixture's array + // order. Anthropic is fully tool-first capable — a `toolCall` block can take + // a lower `index` than a `text` block. Content-block indices are assigned in + // encounter order, continuing from any leading thinking/redacted blocks. + const ordered = resolveFixtureBlocks(blocks); + + for (const block of ordered) { + if (block.type === "text") { + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { type: "text", text: "" }, + }); + + for (let i = 0; i < block.text.length; i += chunkSize) { + const slice = block.text.slice(i, i + chunkSize); + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { type: "text_delta", text: slice }, + }); + } + + events.push({ + type: "content_block_stop", + index: blockIndex, + }); + + blockIndex++; + } else { + const toolUseId = block.id || generateToolUseId(); + + let argsObj: unknown; + try { + argsObj = JSON.parse(block.arguments || "{}"); + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${block.name}": ${block.arguments}`, + ); + argsObj = {}; + } + const argsJson = JSON.stringify(argsObj); + + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { + type: "tool_use", + id: toolUseId, + name: block.name, + input: {}, + }, + }); + + for (let i = 0; i < argsJson.length; i += chunkSize) { + const slice = argsJson.slice(i, i + chunkSize); + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { type: "input_json_delta", partial_json: slice }, + }); + } + + events.push({ + type: "content_block_stop", + index: blockIndex, + }); + + blockIndex++; + } + } + + // message_delta + events.push({ + type: "message_delta", + delta: { + stop_reason: claudeStopReason(overrides?.finishReason, "tool_use"), + stop_sequence: null, + }, + usage: { output_tokens: claudeUsage(overrides).output_tokens }, + }); + + // message_stop + events.push({ type: "message_stop" }); + + return events; + } + + // LEGACY PATH (byte-for-byte unchanged): text content block, then tool_use + // blocks in `toolCalls` order. Reached only when `blocks` is absent; the + // leading redacted-thinking/thinking blocks above are shared with the new + // path and produce identical wire output here. // Text content block events.push({ type: "content_block_start", @@ -970,6 +1067,7 @@ function buildClaudeContentWithToolCallsResponse( overrides?: ResponseOverrides, reasoningSignature?: string, redactedThinking?: string[], + blocks?: FixtureBlock[], ): object { const contentBlocks: object[] = []; @@ -986,9 +1084,10 @@ function buildClaudeContentWithToolCallsResponse( }); } - contentBlocks.push({ type: "text", text: content }); - - for (const tc of toolCalls) { + // Build a tool_use content block from a fixture tool call, parsing its + // string `arguments` into the object `input` Anthropic emits (warning on + // malformed JSON, same idiom as the streaming/legacy paths). + const toolUseBlock = (tc: { name: string; arguments: string; id?: string }): object => { let argsObj: unknown; try { argsObj = JSON.parse(tc.arguments || "{}"); @@ -998,12 +1097,36 @@ function buildClaudeContentWithToolCallsResponse( ); argsObj = {}; } - contentBlocks.push({ + return { type: "tool_use", id: tc.id || generateToolUseId(), name: tc.name, input: argsObj, - }); + }; + }; + + if (blocks && blocks.length > 0) { + // NEW PATH: the non-streaming `content[]` array is positionally observable, + // so emit `text`/`tool_use` content blocks in the fixture's ARRAY ORDER + // (after any leading redacted/thinking blocks). A toolCall block before a + // text block therefore yields a tool_use ahead of the text — matching the + // streaming path for the same `blocks` fixture. + const ordered = resolveFixtureBlocks(blocks); + for (const block of ordered) { + if (block.type === "text") { + contentBlocks.push({ type: "text", text: block.text }); + } else { + contentBlocks.push( + toolUseBlock({ name: block.name, arguments: block.arguments, id: block.id }), + ); + } + } + } else { + // LEGACY PATH (unchanged): text content block, then tool_use blocks. + contentBlocks.push({ type: "text", text: content }); + for (const tc of toolCalls) { + contentBlocks.push(toolUseBlock(tc)); + } } return { @@ -1340,6 +1463,7 @@ export async function handleMessages( overrides, effReasoningSignature, effRedactedThinking, + response.blocks, ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); @@ -1354,6 +1478,7 @@ export async function handleMessages( overrides, effReasoningSignature, effRedactedThinking, + response.blocks, ); const interruption = createInterruptionSignal(fixture); const completed = await writeClaudeSSEStream(res, events, { diff --git a/src/ollama.ts b/src/ollama.ts index acba505d..88a6b5c3 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -18,6 +18,7 @@ import type { ChatCompletionRequest, ChatMessage, Fixture, + FixtureBlock, HandlerDefaults, ToolCall, ToolDefinition, @@ -28,6 +29,7 @@ import { isContentWithToolCallsResponse, isErrorResponse, isEmbeddingResponse, + resolveFixtureBlocks, serializeErrorResponse, generateDeterministicEmbedding, flattenHeaders, @@ -332,6 +334,21 @@ function buildOllamaChatToolCallResponse( // ─── Response builders: /api/chat — content + tool calls ──────────────────── +// Map a fixture tool call into Ollama's wire shape (object arguments, no id). +function toOllamaToolCall( + tc: ToolCall, + logger: Logger, +): { function: { name: string; arguments: unknown } } { + let argsObj: unknown; + try { + argsObj = JSON.parse(tc.arguments || "{}"); + } catch { + logger.warn(`Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`); + argsObj = {}; + } + return { function: { name: tc.name, arguments: argsObj } }; +} + function buildOllamaChatContentWithToolCallsChunks( content: string, toolCalls: ToolCall[], @@ -339,10 +356,75 @@ function buildOllamaChatContentWithToolCallsChunks( chunkSize: number, logger: Logger, reasoning?: string, + blocks?: FixtureBlock[], ): object[] { const chunks: object[] = []; const createdAt = new Date().toISOString(); + // ── Ordered-blocks path ────────────────────────────────────────────────── + // When the fixture declares explicit `blocks`, stream NDJSON message chunks + // following the blocks' ARRAY ORDER: a text block emits a `message.content` + // delta chunk; a toolCall block emits a chunk carrying `message.tool_calls`. + // So [toolCall, text] puts the tool_call-bearing chunk before the content + // chunk. Ollama tool-first ordering is PARTIALLY observable: the chunk order + // on the wire is honored, but some Ollama clients reassemble content and + // tool_calls positionally (text first regardless), so downstream order is + // best-effort. Reasoning chunks (if any) still lead, matching legacy. The + // legacy single-chunk-all-tools path stays untouched on the else branch. + if (blocks && blocks.length > 0) { + const ordered = resolveFixtureBlocks(blocks); + + // Reasoning chunks (before everything else), identical to legacy. + if (reasoning) { + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + chunks.push({ + model, + created_at: createdAt, + message: { role: "assistant", content: "", reasoning_content: slice }, + done: false, + }); + } + } + + for (const block of ordered) { + if (block.type === "text") { + for (let i = 0; i < block.text.length; i += chunkSize) { + const slice = block.text.slice(i, i + chunkSize); + chunks.push({ + model, + created_at: createdAt, + message: { role: "assistant", content: slice }, + done: false, + }); + } + } else { + chunks.push({ + model, + created_at: createdAt, + message: { + role: "assistant", + content: "", + tool_calls: [toOllamaToolCall(block, logger)], + }, + done: false, + }); + } + } + + // Final chunk — preserved exactly as legacy (done + timing fields). + chunks.push({ + model, + created_at: createdAt, + message: { role: "assistant", content: "" }, + done: true, + ...DURATION_FIELDS, + }); + + return chunks; + } + + // ── Legacy path (UNCHANGED) ────────────────────────────────────────────── // Reasoning chunks (before content) if (reasoning) { for (let i = 0; i < reasoning.length; i += chunkSize) { @@ -409,6 +491,14 @@ function buildOllamaChatContentWithToolCallsChunks( return chunks; } +// NOTE (#274): this NON-streaming Ollama builder is intentionally degenerate +// w.r.t. `blocks` ordering. Ollama's non-streaming chat response puts `content` +// and `tool_calls` in SEPARATE fields on a single `message` object — they are +// NOT a positionally-observable array, so a tool-first `blocks` fixture cannot +// be expressed in the wire shape. Honoring block order here would be a no-op, +// so we keep the legacy text+tool_calls fields unchanged. (Order-observable +// surfaces — Claude `content[]`, Gemini `parts[]`, Responses `output[]` — DO +// honor block order; see those builders.) function buildOllamaChatContentWithToolCallsResponse( content: string, toolCalls: ToolCall[], @@ -755,6 +845,7 @@ export async function handleOllama( chunkSize, logger, effReasoning, + response.blocks, ); const interruption = createInterruptionSignal(fixture); const completed = await writeNDJSONStream(res, chunks, { diff --git a/src/responses.ts b/src/responses.ts index c61ab47a..592f9ba5 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -11,6 +11,7 @@ import type { ChatCompletionRequest, ChatMessage, Fixture, + FixtureBlock, HandlerDefaults, ResponseOverrides, StreamingProfile, @@ -20,6 +21,7 @@ import type { import { generateId, generateToolCallId, + resolveFixtureBlocks, extractOverrides, isTextResponse, isToolCallResponse, @@ -656,6 +658,71 @@ function buildMessageOutputEvents( return { events, msgItem }; } +interface FunctionCallBlockResult { + events: ResponsesSSEEvent[]; + fcItem: object; +} + +/** + * Emit the output_item.added → arguments deltas → arguments.done → + * output_item.done events for a single function_call at `outputIndex`, + * returning the completed item for the final `output` array. Behavior is + * identical to the inline per-tool-call loop in the legacy path; both the + * legacy branch and the ordered-blocks branch share this so wire output stays + * byte-identical for a given (tool, outputIndex). + */ +function buildFunctionCallOutputEvents( + toolCall: ToolCall, + chunkSize: number, + outputIndex: number, +): FunctionCallBlockResult { + const callId = toolCall.id || generateToolCallId(); + const fcId = generateId("fc"); + const args = toolCall.arguments; + const events: ResponsesSSEEvent[] = []; + + events.push({ + type: "response.output_item.added", + output_index: outputIndex, + item: { + type: "function_call", + id: fcId, + call_id: callId, + name: toolCall.name, + arguments: "", + status: "in_progress", + }, + }); + + for (let i = 0; i < args.length; i += chunkSize) { + events.push({ + type: "response.function_call_arguments.delta", + item_id: fcId, + output_index: outputIndex, + delta: args.slice(i, i + chunkSize), + }); + } + + events.push({ + type: "response.function_call_arguments.done", + item_id: fcId, + output_index: outputIndex, + arguments: args, + }); + + const fcItem = { + type: "function_call", + id: fcId, + call_id: callId, + name: toolCall.name, + arguments: args, + status: "completed", + }; + events.push({ type: "response.output_item.done", output_index: outputIndex, item: fcItem }); + + return { events, fcItem }; +} + // ─── Non-streaming response builders ──────────────────────────────────────── function buildOutputPrefix(content: string, reasoning?: string, webSearches?: string[]): object[] { @@ -767,6 +834,7 @@ export function buildContentWithToolCallsStreamEvents( reasoning?: string, webSearches?: string[], overrides?: ResponseOverrides, + blocks?: FixtureBlock[], ): ResponsesSSEEvent[] { const { respId, created, events, prefixOutputItems, nextOutputIndex } = buildResponsePreamble( model, @@ -776,60 +844,59 @@ export function buildContentWithToolCallsStreamEvents( overrides, ); - const { events: msgEvents, msgItem } = buildMessageOutputEvents( - content, - chunkSize, - nextOutputIndex, - ); - events.push(...msgEvents); - - const fcOutputItems: object[] = []; - for (let idx = 0; idx < toolCalls.length; idx++) { - const tc = toolCalls[idx]; - const callId = tc.id || generateToolCallId(); - const fcId = generateId("fc"); - const fcOutputIndex = nextOutputIndex + 1 + idx; - const args = tc.arguments; - - events.push({ - type: "response.output_item.added", - output_index: fcOutputIndex, - item: { - type: "function_call", - id: fcId, - call_id: callId, - name: tc.name, - arguments: "", - status: "in_progress", - }, - }); - - for (let i = 0; i < args.length; i += chunkSize) { - events.push({ - type: "response.function_call_arguments.delta", - item_id: fcId, - output_index: fcOutputIndex, - delta: args.slice(i, i + chunkSize), - }); + // The output items assembled in emission order (after any reasoning / + // web-search prefix items). Each output_index is assigned sequentially as we + // walk the chosen item order, so the `output_index` on every emitted event + // matches that item's slot in the final `response.completed.output` array. + const orderedOutputItems: object[] = []; + + if (blocks && blocks.length > 0) { + // NEW PATH: stream items in the fixture's block ARRAY ORDER. A toolCall + // block placed before a text block therefore yields a function_call item at + // a LOWER output_index than the message — it leads the output array. + const ordered = resolveFixtureBlocks(blocks); + let outputIndex = nextOutputIndex; + for (const block of ordered) { + if (block.type === "text") { + const { events: msgEvents, msgItem } = buildMessageOutputEvents( + block.text, + chunkSize, + outputIndex, + ); + events.push(...msgEvents); + orderedOutputItems.push(msgItem); + } else { + const { events: fcEvents, fcItem } = buildFunctionCallOutputEvents( + { name: block.name, arguments: block.arguments, id: block.id }, + chunkSize, + outputIndex, + ); + events.push(...fcEvents); + orderedOutputItems.push(fcItem); + } + outputIndex += 1; } + } else { + // LEGACY PATH: message item first, then function_call items — byte-for-byte + // unchanged from the pre-blocks behavior (message always leads the output). + const { events: msgEvents, msgItem } = buildMessageOutputEvents( + content, + chunkSize, + nextOutputIndex, + ); + events.push(...msgEvents); + orderedOutputItems.push(msgItem); - events.push({ - type: "response.function_call_arguments.done", - item_id: fcId, - output_index: fcOutputIndex, - arguments: args, - }); - - const doneItem = { - type: "function_call", - id: fcId, - call_id: callId, - name: tc.name, - arguments: args, - status: "completed", - }; - events.push({ type: "response.output_item.done", output_index: fcOutputIndex, item: doneItem }); - fcOutputItems.push(doneItem); + for (let idx = 0; idx < toolCalls.length; idx++) { + const fcOutputIndex = nextOutputIndex + 1 + idx; + const { events: fcEvents, fcItem } = buildFunctionCallOutputEvents( + toolCalls[idx], + chunkSize, + fcOutputIndex, + ); + events.push(...fcEvents); + orderedOutputItems.push(fcItem); + } } events.push({ @@ -840,7 +907,7 @@ export function buildContentWithToolCallsStreamEvents( created_at: created, model: overrides?.model ?? model, status: responsesStatus(overrides?.finishReason, "completed"), - output: [...prefixOutputItems, msgItem, ...fcOutputItems], + output: [...prefixOutputItems, ...orderedOutputItems], usage: responsesUsage(overrides), }, }); @@ -848,6 +915,27 @@ export function buildContentWithToolCallsStreamEvents( return events; } +function buildFunctionCallOutputItem(tc: { name: string; arguments: string; id?: string }): object { + return { + type: "function_call", + id: generateId("fc"), + call_id: tc.id || generateToolCallId(), + name: tc.name, + arguments: tc.arguments, + status: "completed", + }; +} + +function buildMessageOutputItem(content: string): object { + return { + type: "message", + id: itemId(), + status: "completed", + role: "assistant", + content: [{ type: "output_text", text: content, annotations: [] }], + }; +} + function buildContentWithToolCallsResponse( content: string, toolCalls: ToolCall[], @@ -855,17 +943,53 @@ function buildContentWithToolCallsResponse( reasoning?: string, webSearches?: string[], overrides?: ResponseOverrides, + blocks?: FixtureBlock[], ): object { + if (blocks && blocks.length > 0) { + // NEW PATH: the non-streaming `output[]` array is positionally observable, + // so emit the prefix (reasoning / web_search_call), then the blocks in + // fixture ARRAY ORDER. A toolCall block before a text block therefore + // yields a function_call item ahead of the message — matching the streaming + // path's ordering for the same `blocks` fixture. + const ordered = resolveFixtureBlocks(blocks); + const output: object[] = []; + if (reasoning) { + output.push({ + type: "reasoning", + id: generateId("rs"), + summary: [{ type: "summary_text", text: reasoning }], + }); + } + if (webSearches && webSearches.length > 0) { + for (const query of webSearches) { + output.push({ + type: "web_search_call", + id: generateId("ws"), + status: "completed", + action: { type: "search", query }, + }); + } + } + for (const block of ordered) { + if (block.type === "text") { + output.push(buildMessageOutputItem(block.text)); + } else { + output.push( + buildFunctionCallOutputItem({ + name: block.name, + arguments: block.arguments, + id: block.id, + }), + ); + } + } + return buildResponseEnvelope(model, output, overrides); + } + + // LEGACY PATH: message item first, then function_call items — unchanged. const output = buildOutputPrefix(content, reasoning, webSearches); for (const tc of toolCalls) { - output.push({ - type: "function_call", - id: generateId("fc"), - call_id: tc.id || generateToolCallId(), - name: tc.name, - arguments: tc.arguments, - status: "completed", - }); + output.push(buildFunctionCallOutputItem(tc)); } return buildResponseEnvelope(model, output, overrides); } @@ -1136,6 +1260,7 @@ export async function handleResponses( effReasoning, response.webSearches, overrides, + response.blocks, ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); @@ -1148,6 +1273,7 @@ export async function handleResponses( effReasoning, response.webSearches, overrides, + response.blocks, ); const interruption = createInterruptionSignal(fixture); const completed = await writeResponsesSSEStream(res, events, { diff --git a/src/server.ts b/src/server.ts index 31f57e80..31da8692 100644 --- a/src/server.ts +++ b/src/server.ts @@ -865,6 +865,7 @@ async function handleCompletions( chunkSize, effReasoning, overrides, + response.blocks, ); // Build usage chunk for stream_options.include_usage const completionText = diff --git a/src/ws-responses.ts b/src/ws-responses.ts index 62fc92e8..c1e0f965 100644 --- a/src/ws-responses.ts +++ b/src/ws-responses.ts @@ -282,6 +282,7 @@ async function processMessage( ), response.webSearches, extractOverrides(response), + response.blocks, ); const interruption = createInterruptionSignal(fixture); From 5f2623ad3522ddf203cabd6e1344b7dc571378f3 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 26 Jun 2026 23:11:39 -0700 Subject: [PATCH 3/5] feat(record): capture stream block order so the recorder persists tool-first fixtures (#274) Preserve the observed streaming block order during collapse so the recorder writes tool-first fixtures faithfully. --- src/recorder.ts | 13 +- src/stream-collapse.ts | 279 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 264 insertions(+), 28 deletions(-) diff --git a/src/recorder.ts b/src/recorder.ts index 2d0a172d..d98598e7 100644 --- a/src/recorder.ts +++ b/src/recorder.ts @@ -699,10 +699,21 @@ export async function proxyAndRecord( arguments: tc.arguments ?? "{}", })); if (collapsed.content) { - // Both content and toolCalls present — save as ContentWithToolCallsResponse + // Both content and toolCalls present — save as ContentWithToolCallsResponse. + // + // Ordered `blocks` (#274) is persisted ONLY when the collapser + // classified the stream as interleaved — a tool-call delta appeared + // strictly before the first content delta, OR a content delta + // appeared after any tool-call delta. The collapser encodes exactly + // that rule: it sets `collapsed.blocks` only in those cases and + // leaves it undefined otherwise. So the recorder simply spreads it + // when present; an ordinary text-then-tools (or text-only) stream has + // no `blocks` and persists the legacy shape byte-identically. + const blocksSpread = collapsed.blocks?.length ? { blocks: collapsed.blocks } : {}; fixtureResponse = { content: collapsed.content, toolCalls: sanitizedToolCalls, + ...blocksSpread, ...reasoningSpread, ...reasoningSignatureSpread, ...redactedThinkingSpread, diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts index 7b3486bd..1fc007c5 100644 --- a/src/stream-collapse.ts +++ b/src/stream-collapse.ts @@ -8,7 +8,7 @@ */ import { crc32 } from "node:zlib"; -import type { RecordProviderKey, ToolCall } from "./types.js"; +import type { FixtureBlock, RecordProviderKey, ToolCall } from "./types.js"; import type { Logger } from "./logger.js"; import { isHarmonyContent, parseHarmonyContent } from "./harmony.js"; @@ -55,6 +55,131 @@ export interface CollapseResult { harmonyUnparsed?: true; /** Short human-readable note accompanying {@link harmonyUnparsed}. */ harmonyNote?: string; + /** + * Ordered cross-channel block list, in STREAM order, populated ONLY when the + * stream is "interleaved" — i.e. a tool-call delta appeared STRICTLY BEFORE + * the first content delta, OR a content delta appeared AFTER any tool-call + * delta. The flat `content` / `toolCalls` fields stay populated UNCHANGED for + * replay back-compat and non-block consumers; `blocks` is purely additive + * positional instrumentation the recorder consults to decide whether to + * persist the ordered shape. Absent (undefined) for text-first, text-only, + * and tool-only streams — i.e. anything NOT interleaved — so the recorder + * keeps the legacy `{ content, toolCalls }` shape byte-identical. + * + * Each text block coalesces all contiguous content deltas between tool + * atoms; each toolCall block carries the fully-assembled name/arguments/id + * for one tool call in the position its FIRST delta arrived. + */ + blocks?: FixtureBlock[]; +} + +// --------------------------------------------------------------------------- +// Cross-channel block-order instrumentation (#274) +// --------------------------------------------------------------------------- + +/** + * Atom recorded during a collapse pass, in stream arrival order. A `text` atom + * carries one content delta's text (contiguous text atoms are coalesced when + * building blocks); a `toolCall` atom is a stable reference to a tool-call + * accumulator whose name/arguments/id are filled in across later deltas. The + * `ref` is the SAME object stored in the collapser's `toolCallMap` (or pushed + * to a flat `toolCalls` array), so block identity is reconciled with the flat + * representation at finalize time — see {@link buildOrderedBlocks}. + */ +type OrderAtom = + | { kind: "text"; text: string } + | { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } }; + +/** + * Normalize a tool call's accumulated `arguments` into valid JSON exactly like + * the flat-`toolCalls` recorder path: an empty / whitespace-only / missing + * value becomes `"{}"`, never `""`. Mirrors `recorder.ts` `toToolCallArguments` + * so a `blocks[].arguments` value is always parseable JSON and never disagrees + * with the flat `toolCalls` entry for the same call. + */ +function normalizeToolArguments(args: string | undefined): string { + if (args === undefined || args.trim() === "") return "{}"; + return args; +} + +/** + * Build a finalized {@link FixtureBlock.toolCall} from a tool-call accumulator, + * normalizing `arguments` so the block agrees byte-for-byte with the flat + * `toolCalls` entry built from the SAME accumulator object. + */ +function toToolCallBlock(ref: { name: string; arguments: string; id?: string }): FixtureBlock { + return { + type: "toolCall", + name: ref.name, + arguments: normalizeToolArguments(ref.arguments), + ...(ref.id ? { id: ref.id } : {}), + }; +} + +/** + * Decide whether a recorded atom sequence is "interleaved" and, if so, build + * the ordered {@link FixtureBlock} list. Returns `undefined` when NOT + * interleaved (text-first, text-only, or tool-only) so callers leave + * `CollapseResult.blocks` unset and the recorder keeps the legacy shape. + * + * Interleaved ⇔ (a tool atom appears strictly before the first text atom) OR + * (a text atom appears after any tool atom). A stream with no tool atoms, or + * with no text atoms, is never interleaved. Text-first-then-tools is the common + * legacy case and is explicitly NOT interleaved. + * + * CONSISTENCY (#274): each toolCall block is derived from the SAME accumulator + * object referenced by its atom and normalized identically to the flat + * `toolCalls` path ({@link toToolCallBlock} / {@link normalizeToolArguments}). + * Because the atom `ref` is the very object the flat list is built from, the + * block and its flat counterpart describe the same call by identity — even when + * upstream tool-call indices do not match stream-arrival order. Empty/missing + * arguments normalize to `"{}"` in BOTH representations, never `""`. + */ +function buildOrderedBlocks(atoms: OrderAtom[]): FixtureBlock[] | undefined { + let firstTextIndex = -1; + let firstToolIndex = -1; + let textAfterTool = false; + let sawTool = false; + let sawText = false; + for (let i = 0; i < atoms.length; i++) { + const a = atoms[i]; + if (a.kind === "text") { + sawText = true; + if (firstTextIndex === -1) firstTextIndex = i; + if (sawTool) textAfterTool = true; + } else { + sawTool = true; + if (firstToolIndex === -1) firstToolIndex = i; + } + } + // No cross-channel ordering to express unless BOTH channels appear. + if (!sawTool || !sawText) return undefined; + const toolBeforeText = firstToolIndex < firstTextIndex; + if (!toolBeforeText && !textAfterTool) return undefined; + + // Coalesce contiguous text atoms into one text block; emit each tool atom as + // a toolCall block reflecting its fully-assembled, normalized accumulator. + const blocks: FixtureBlock[] = []; + let pendingText = ""; + let hasPendingText = false; + const flushText = () => { + if (hasPendingText) { + blocks.push({ type: "text", text: pendingText }); + pendingText = ""; + hasPendingText = false; + } + }; + for (const a of atoms) { + if (a.kind === "text") { + pendingText += a.text; + hasPendingText = true; + } else { + flushText(); + blocks.push(toToolCallBlock(a.ref)); + } + } + flushText(); + return blocks; } /** @@ -171,6 +296,10 @@ export function collapseOpenAISSE(body: string): CollapseResult { // it (they are small per-stream counters), so synthetic keys never collide. let nextSyntheticIndex = 1_000_000; const idKeyMap = new Map(); + // Cross-channel order atoms (#274), in stream arrival order. A toolCall atom + // references the same accumulator object stored in toolCallMap, so later arg + // deltas mutate the block in place. + const orderAtoms: OrderAtom[] = []; for (const line of lines) { const data = extractSSEData(splitSSELines(line)); @@ -237,6 +366,9 @@ export function collapseOpenAISSE(body: string): CollapseResult { // Text content if (typeof delta.content === "string") { content += delta.content; + if (delta.content.length > 0) { + orderAtoms.push({ kind: "text", text: delta.content }); + } } // Tool calls @@ -265,11 +397,15 @@ export function collapseOpenAISSE(body: string): CollapseResult { } if (!toolCallMap.has(index)) { - toolCallMap.set(index, { + const created = { id: rawId ?? "", name: (fn?.name as string) ?? "", arguments: "", - }); + }; + toolCallMap.set(index, created); + // Record the tool atom at the position its FIRST delta arrived; it + // references `created` so later name/arg deltas fill it in place. + orderAtoms.push({ kind: "toolCall", ref: created }); } const entry = toolCallMap.get(index)!; @@ -311,19 +447,36 @@ export function collapseOpenAISSE(body: string): CollapseResult { } if (toolCallMap.size > 0 || harmonyToolCalls.length > 0) { - const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); + const blocks = buildOrderedBlocks(orderAtoms); + // When the stream is interleaved we persist ordered `blocks`; the flat + // `toolCalls` MUST then describe the same calls in the same order so the two + // representations never disagree (#274). The toolCall atoms reference the + // same accumulator objects as `toolCallMap`, so derive the flat list from + // those atoms (stream-arrival order, matching blocks) when blocks exist; + // otherwise keep the legacy index-sorted order for byte-identical fixtures. + const orderedToolCalls = orderAtoms + .filter( + (a): a is { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } } => + a.kind === "toolCall", + ) + .map((a) => ({ + name: a.ref.name, + arguments: normalizeToolArguments(a.ref.arguments), + ...(a.ref.id ? { id: a.ref.id } : {}), + })); + const indexSortedToolCalls = Array.from(toolCallMap.entries()) + .sort(([a], [b]) => a - b) + .map(([, tc]) => ({ + name: tc.name, + arguments: normalizeToolArguments(tc.arguments), + ...(tc.id ? { id: tc.id } : {}), + })); return { + ...(blocks ? { blocks } : {}), ...(content ? { content } : {}), // Fallback-only: harmonyToolCalls are populated ONLY in the // no-structured-calls branch, so this is never a merge of both sources. - toolCalls: [ - ...sorted.map(([, tc]) => ({ - name: tc.name, - arguments: tc.arguments, - ...(tc.id ? { id: tc.id } : {}), - })), - ...harmonyToolCalls, - ], + toolCalls: [...(blocks ? orderedToolCalls : indexSortedToolCalls), ...harmonyToolCalls], // Reasoning is preserved alongside tool calls for ALL structured streams // (DeepSeek/OpenRouter reasoning_content, harmony analysis channel), at // parity with every other collapser and the non-streaming path. @@ -388,6 +541,8 @@ export function collapseAnthropicSSE(body: string): CollapseResult { // below it. let nextSyntheticIndex = 1_000_000; let lastSyntheticIndex: number | undefined; + // Cross-channel order atoms (#274), in stream arrival order. + const orderAtoms: OrderAtom[] = []; for (const block of blocks) { const lines = splitSSELines(block); @@ -430,11 +585,15 @@ export function collapseAnthropicSSE(body: string): CollapseResult { index = nextSyntheticIndex++; } lastSyntheticIndex = index; - toolCallMap.set(index, { + const created = { id: (contentBlock.id as string) ?? "", name: (contentBlock.name as string) ?? "", arguments: "", - }); + }; + toolCallMap.set(index, created); + // Record the tool atom at the position the tool_use block opened; it + // references `created` so later input_json_delta fragments fill it in. + orderAtoms.push({ kind: "toolCall", ref: created }); } } @@ -444,6 +603,9 @@ export function collapseAnthropicSSE(body: string): CollapseResult { if (delta.type === "text_delta" && typeof delta.text === "string") { content += delta.text; + if (delta.text.length > 0) { + orderAtoms.push({ kind: "text", text: delta.text }); + } } if (delta.type === "thinking_delta" && typeof delta.thinking === "string") { @@ -485,14 +647,33 @@ export function collapseAnthropicSSE(body: string): CollapseResult { } if (toolCallMap.size > 0) { - const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); - return { - ...(content ? { content } : {}), - toolCalls: sorted.map(([, tc]) => ({ + const orderedBlocks = buildOrderedBlocks(orderAtoms); + // When interleaved (`blocks` present) the flat `toolCalls` MUST match the + // blocks' order/identity (#274). The toolCall atoms reference the same + // accumulator objects as `toolCallMap`, so derive the flat list from those + // atoms (stream-arrival order) when blocks exist; otherwise keep the legacy + // index-sorted order for byte-identical fixtures. + const orderedToolCalls = orderAtoms + .filter( + (a): a is { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } } => + a.kind === "toolCall", + ) + .map((a) => ({ + name: a.ref.name, + arguments: normalizeToolArguments(a.ref.arguments), + ...(a.ref.id ? { id: a.ref.id } : {}), + })); + const indexSortedToolCalls = Array.from(toolCallMap.entries()) + .sort(([a], [b]) => a - b) + .map(([, tc]) => ({ name: tc.name, - arguments: tc.arguments, + arguments: normalizeToolArguments(tc.arguments), ...(tc.id ? { id: tc.id } : {}), - })), + })); + return { + ...(orderedBlocks ? { blocks: orderedBlocks } : {}), + ...(content ? { content } : {}), + toolCalls: orderedBlocks ? orderedToolCalls : indexSortedToolCalls, ...(reasoning ? { reasoning } : {}), ...(reasoningSignature ? { reasoningSignature } : {}), ...(redactedThinking.length > 0 ? { redactedThinking } : {}), @@ -530,6 +711,8 @@ export function collapseGeminiSSE(body: string): CollapseResult { let audioB64 = ""; let audioMimeType: string | undefined; const toolCalls: ToolCall[] = []; + // Cross-channel order atoms (#274), in stream arrival order. + const orderAtoms: OrderAtom[] = []; for (const line of lines) { const data = extractSSEData(splitSSELines(line)); @@ -561,7 +744,7 @@ export function collapseGeminiSSE(body: string): CollapseResult { for (const part of parts) { if (part.functionCall) { const fc = part.functionCall as Record; - toolCalls.push({ + const created: ToolCall = { name: String(fc.name ?? ""), // Default undefined/object args to a JSON object string (matches // collapseGeminiInteractionsSSE / Ollama). JSON.stringify(undefined) @@ -569,7 +752,10 @@ export function collapseGeminiSSE(body: string): CollapseResult { // ToolCall.arguments:string contract. arguments: typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args ?? {}), - }); + }; + toolCalls.push(created); + // Record the tool atom at the position this functionCall part arrived. + orderAtoms.push({ kind: "toolCall", ref: created }); } else if ( part.inlineData && typeof (part.inlineData as Record).mimeType === "string" && @@ -587,30 +773,51 @@ export function collapseGeminiSSE(body: string): CollapseResult { reasoning += part.text; } else { content += part.text; + if (part.text.length > 0) { + orderAtoms.push({ kind: "text", text: part.text }); + } } } } } + // Normalize the flat tool calls' arguments identically to the block path so + // the two representations never disagree (#274). The toolCall atoms reference + // the same `created` objects pushed here, so blocks and flat describe the same + // calls in the same order; this only reconciles empty/missing → "{}". + const normalizedToolCalls = toolCalls.map((tc) => ({ + ...tc, + arguments: normalizeToolArguments(tc.arguments), + })); + if (audioB64) { // Preserve any content / reasoning / tool calls accumulated in the same // stream — a Gemini turn can interleave audio with text and functionCall // parts, and the early return must not silently drop them. + // + // Deliberately do NOT build ordered `blocks` here (#274, R2-N2): the audio + // collapse shape maps to AudioResponse, which has no `blocks` slot, and the + // recorder's audio branch never persists `collapsed.blocks`. Producing block + // ordering on this path would be silently produced-then-dropped, advertising + // a field this result shape can't carry. Block ordering is built only on the + // content+toolCalls path below, which can actually carry it. return { audioB64, audioMimeType, ...(content ? { content } : {}), ...(reasoning ? { reasoning } : {}), - ...(toolCalls.length > 0 ? { toolCalls } : {}), + ...(normalizedToolCalls.length > 0 ? { toolCalls: normalizedToolCalls } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), }; } if (toolCalls.length > 0) { + const blocks = buildOrderedBlocks(orderAtoms); return { + ...(blocks ? { blocks } : {}), ...(content ? { content } : {}), - toolCalls, + toolCalls: normalizedToolCalls, ...(reasoning ? { reasoning } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), @@ -652,6 +859,8 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { let harmonyUnparsed = false; let harmonyNote: string | undefined; const toolCalls: ToolCall[] = []; + // Cross-channel order atoms (#274), in stream arrival order. + const orderAtoms: OrderAtom[] = []; for (const line of lines) { let parsed: Record; @@ -671,6 +880,9 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { if (message) { if (typeof message.content === "string") { content += message.content; + if (message.content.length > 0) { + orderAtoms.push({ kind: "text", text: message.content }); + } } // Tool calls @@ -678,7 +890,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { for (const tc of message.tool_calls as Array>) { const fn = tc.function as Record | undefined; if (fn) { - toolCalls.push({ + const created: ToolCall = { name: String(fn.name ?? ""), // Default undefined/object args to a JSON object (matching // collapseGeminiInteractionsSSE) — JSON.stringify(undefined) @@ -687,7 +899,9 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments ?? {}), - }); + }; + toolCalls.push(created); + orderAtoms.push({ kind: "toolCall", ref: created }); } } } @@ -696,6 +910,9 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { // /api/generate format else if (typeof parsed.response === "string") { content += parsed.response; + if (parsed.response.length > 0) { + orderAtoms.push({ kind: "text", text: parsed.response }); + } } } @@ -721,9 +938,17 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { } if (toolCalls.length > 0) { + const blocks = buildOrderedBlocks(orderAtoms); + // Normalize flat arguments identically to the block path so the two + // representations never disagree (#274); same `created` refs, same order. + const normalizedToolCalls = toolCalls.map((tc) => ({ + ...tc, + arguments: normalizeToolArguments(tc.arguments), + })); return { + ...(blocks ? { blocks } : {}), ...(content ? { content } : {}), - toolCalls, + toolCalls: normalizedToolCalls, ...(reasoning ? { reasoning } : {}), ...(droppedChunks > 0 ? { droppedChunks } : {}), ...(firstDroppedSample ? { firstDroppedSample } : {}), From 6174c97d1c3d92a45a467bbf2b0b3003a979e12d Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 26 Jun 2026 23:11:44 -0700 Subject: [PATCH 4/5] test(fixtures): block-ordering coverage across replay, record, e2e, and back-compat (#274) Add and extend tests covering block-ordered replay per provider, recorder capture, end-to-end flow, and backward compatibility with legacy fixtures. --- src/__tests__/async-fixture-response.test.ts | 37 ++ src/__tests__/content-with-toolcalls.test.ts | 66 +++- .../fixture-blocks-anthropic.test.ts | 135 +++++++ src/__tests__/fixture-blocks-e2e.test.ts | 242 ++++++++++++ src/__tests__/fixture-blocks-gemini.test.ts | 157 ++++++++ src/__tests__/fixture-blocks-loader.test.ts | 134 +++++++ .../fixture-blocks-nonstreaming.test.ts | 218 +++++++++++ src/__tests__/fixture-blocks-ollama.test.ts | 137 +++++++ src/__tests__/fixture-blocks-openai.test.ts | 159 ++++++++ .../fixture-blocks-responses.test.ts | 201 ++++++++++ .../fixture-blocks-scoped-out.test.ts | 143 ++++++++ src/__tests__/fixture-loader.test.ts | 154 ++++++++ src/__tests__/recorder.test.ts | 114 ++++++ src/__tests__/stream-collapse.test.ts | 346 ++++++++++++++++++ src/__tests__/ws-responses.test.ts | 79 +++- 15 files changed, 2319 insertions(+), 3 deletions(-) create mode 100644 src/__tests__/fixture-blocks-anthropic.test.ts create mode 100644 src/__tests__/fixture-blocks-e2e.test.ts create mode 100644 src/__tests__/fixture-blocks-gemini.test.ts create mode 100644 src/__tests__/fixture-blocks-loader.test.ts create mode 100644 src/__tests__/fixture-blocks-nonstreaming.test.ts create mode 100644 src/__tests__/fixture-blocks-ollama.test.ts create mode 100644 src/__tests__/fixture-blocks-openai.test.ts create mode 100644 src/__tests__/fixture-blocks-responses.test.ts create mode 100644 src/__tests__/fixture-blocks-scoped-out.test.ts diff --git a/src/__tests__/async-fixture-response.test.ts b/src/__tests__/async-fixture-response.test.ts index f4aa0e67..0ed69fd9 100644 --- a/src/__tests__/async-fixture-response.test.ts +++ b/src/__tests__/async-fixture-response.test.ts @@ -225,6 +225,43 @@ describe("async fixture response (function responses)", () => { expect(res.status).toBe(500); }); + it("stringifies object arguments on a factory-returned toolCall block", async () => { + mock = new LLMock({ port: 0 }); + mock.on( + { userMessage: "blocks-fn" }, + () => + ({ + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: { city: "NYC" } }], + blocks: [ + // OBJECT arguments — must be auto-stringified like toolCalls[].arguments, + // otherwise resolveFixtureBlocks throws (FixtureBlock requires string args). + // eslint-disable-next-line @typescript-eslint/no-explicit-any + { type: "toolCall", name: "get_weather", arguments: { city: "NYC" } } as any, + ], + // eslint-disable-next-line @typescript-eslint/no-explicit-any + }) as any, + ); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "blocks-fn" }], + stream: true, + }), + }); + + expect(res.status).toBe(200); + const chunks = parseSSEChunks(await res.text()); + const args = chunks + .map((c) => c.choices?.[0]?.delta?.tool_calls?.[0]?.function?.arguments ?? "") + .join(""); + expect(args).toBe('{"city":"NYC"}'); + }); + it("works with async factory and streaming", async () => { mock = new LLMock({ port: 0 }); mock.on({ userMessage: "async-stream" }, async () => { diff --git a/src/__tests__/content-with-toolcalls.test.ts b/src/__tests__/content-with-toolcalls.test.ts index f10d92e8..a35bcc6a 100644 --- a/src/__tests__/content-with-toolcalls.test.ts +++ b/src/__tests__/content-with-toolcalls.test.ts @@ -1,7 +1,12 @@ import { describe, it, expect, afterEach } from "vitest"; -import { isContentWithToolCallsResponse, isTextResponse, isToolCallResponse } from "../helpers.js"; +import { + isContentWithToolCallsResponse, + isTextResponse, + isToolCallResponse, + resolveFixtureBlocks, +} from "../helpers.js"; import { LLMock } from "../llmock.js"; -import type { SSEChunk } from "../types.js"; +import type { FixtureBlock, SSEChunk } from "../types.js"; describe("isContentWithToolCallsResponse", () => { it("returns true when both content and toolCalls are present", () => { @@ -39,6 +44,63 @@ describe("isContentWithToolCallsResponse", () => { }); }); +describe("resolveFixtureBlocks", () => { + it("passes a valid mixed blocks array through in order", () => { + const blocks: FixtureBlock[] = [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go" }, + { type: "toolCall", name: "get_time", arguments: "{}", id: "call_1" }, + ]; + const result = resolveFixtureBlocks(blocks); + // Same reference, same order — passthrough, not reconstruction. + expect(result).toBe(blocks); + expect(result.map((b) => b.type)).toEqual(["toolCall", "text", "toolCall"]); + }); + + it("accepts a text block with a string text field", () => { + const blocks: FixtureBlock[] = [{ type: "text", text: "hi" }]; + expect(resolveFixtureBlocks(blocks)).toEqual(blocks); + }); + + it("accepts a toolCall block without an optional id", () => { + const blocks: FixtureBlock[] = [{ type: "toolCall", name: "f", arguments: "{}" }]; + expect(resolveFixtureBlocks(blocks)).toEqual(blocks); + }); + + it("rejects a non-array argument", () => { + expect(() => resolveFixtureBlocks({} as unknown as FixtureBlock[])).toThrow( + /expected an array/, + ); + }); + + it("rejects a text block with a non-string text field", () => { + const blocks = [{ type: "text", text: 42 }] as unknown as FixtureBlock[]; + expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*string "text"/); + }); + + it("rejects a toolCall block missing arguments", () => { + const blocks = [{ type: "toolCall", name: "f" }] as unknown as FixtureBlock[]; + expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*"name" and "arguments"/); + }); + + it("rejects a toolCall block with a non-string id", () => { + const blocks = [ + { type: "toolCall", name: "f", arguments: "{}", id: 1 }, + ] as unknown as FixtureBlock[]; + expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*"id" must be a string/); + }); + + it("rejects a block with an unknown type", () => { + const blocks = [{ type: "image" }] as unknown as FixtureBlock[]; + expect(() => resolveFixtureBlocks(blocks)).toThrow(/unknown type/); + }); + + it("rejects a null entry", () => { + const blocks = [null] as unknown as FixtureBlock[]; + expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*expected an object/); + }); +}); + function parseSSEChunks(body: string): SSEChunk[] { return body .split("\n\n") diff --git a/src/__tests__/fixture-blocks-anthropic.test.ts b/src/__tests__/fixture-blocks-anthropic.test.ts new file mode 100644 index 00000000..495c5078 --- /dev/null +++ b/src/__tests__/fixture-blocks-anthropic.test.ts @@ -0,0 +1,135 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { LLMock } from "../llmock.js"; + +interface AnthropicSSEEvent { + type: string; + index?: number; + content_block?: { type: string; name?: string; input?: unknown }; + delta?: Record; + [key: string]: unknown; +} + +function parseAnthropicSSEEvents(body: string): AnthropicSSEEvent[] { + return body + .split("\n\n") + .filter((block) => block.trim().length > 0) + .map((block) => { + const dataLine = block.split("\n").find((l) => l.startsWith("data: ")); + if (!dataLine) return null; + return JSON.parse(dataLine.slice(6)) as AnthropicSSEEvent; + }) + .filter(Boolean) as AnthropicSSEEvent[]; +} + +async function postAnthropicStream( + mock: LLMock, + userMessage: string, +): Promise { + const res = await fetch(`${mock.url}/v1/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": "test-key", + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: "claude-sonnet-4-6", + max_tokens: 1024, + messages: [{ role: "user", content: userMessage }], + stream: true, + }), + }); + return parseAnthropicSSEEvents(await res.text()); +} + +describe("Anthropic Messages — ordered fixture blocks (tool-first)", () => { + let mock: LLMock | null = null; + + afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + }); + + it("emits tool_use content block at index 0 and text block at index 1 for [toolCall, text]", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test anthropic blocks tool-first" }, + response: { + content: "Checking.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, + ], + }, + }); + await mock.start(); + + const events = await postAnthropicStream(mock, "test anthropic blocks tool-first"); + + const starts = events.filter((e) => e.type === "content_block_start"); + // First content block must be the tool_use (index 0), then text (index 1). + expect(starts.length).toBe(2); + expect(starts[0].index).toBe(0); + expect(starts[0].content_block?.type).toBe("tool_use"); + expect(starts[0].content_block?.name).toBe("get_weather"); + expect(starts[1].index).toBe(1); + expect(starts[1].content_block?.type).toBe("text"); + + // The tool_use start must precede the text start on the wire. + const toolIdx = events.findIndex( + (e) => e.type === "content_block_start" && e.content_block?.type === "tool_use", + ); + const textIdx = events.findIndex( + (e) => e.type === "content_block_start" && e.content_block?.type === "text", + ); + expect(toolIdx).toBeLessThan(textIdx); + + // The tool_use input arrives via input_json_delta on index 0. + const toolDelta = events.find( + (e) => + e.type === "content_block_delta" && e.index === 0 && e.delta?.type === "input_json_delta", + ); + expect(toolDelta).toBeDefined(); + expect(toolDelta!.delta!.partial_json).toBe('{"city":"NYC"}'); + + // The text arrives via text_delta on index 1. + const textDelta = events.find( + (e) => e.type === "content_block_delta" && e.index === 1 && e.delta?.type === "text_delta", + ); + expect(textDelta).toBeDefined(); + expect(textDelta!.delta!.text).toBe("Here you go."); + + // message envelope preserved. + const messageStart = events.find((e) => e.type === "message_start"); + const messageDelta = events.find((e) => e.type === "message_delta"); + const messageStop = events.find((e) => e.type === "message_stop"); + expect(messageStart).toBeDefined(); + expect(messageStop).toBeDefined(); + expect((messageDelta!.delta as { stop_reason: string }).stop_reason).toBe("tool_use"); + }); + + it("back-compat: a fixture without blocks emits the legacy text-first block at index 0", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test anthropic blocks legacy" }, + response: { + content: "Checking.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const events = await postAnthropicStream(mock, "test anthropic blocks legacy"); + + const starts = events.filter((e) => e.type === "content_block_start"); + // Legacy always emits the text block first (index 0) then tool_use (index 1). + expect(starts.length).toBe(2); + expect(starts[0].index).toBe(0); + expect(starts[0].content_block?.type).toBe("text"); + expect(starts[1].index).toBe(1); + expect(starts[1].content_block?.type).toBe("tool_use"); + }); +}); diff --git a/src/__tests__/fixture-blocks-e2e.test.ts b/src/__tests__/fixture-blocks-e2e.test.ts new file mode 100644 index 00000000..5dc3186c --- /dev/null +++ b/src/__tests__/fixture-blocks-e2e.test.ts @@ -0,0 +1,242 @@ +/** + * #274 slot T3 — END-TO-END integration for ordered `blocks`. + * + * Unlike the per-provider builder tests (which construct an in-memory fixture + * and call `mock.addFixture(...)`), this suite proves the FULL pipeline works + * for a REAL on-disk JSON fixture: a `.json` file is written to a temp dir, + * loaded THROUGH THE REAL LOADER via `mock.loadFixtureFile(...)`, served by a + * live `LLMock` HTTP server, and the wire bytes are asserted. + * + * This closes the loader→builder→dispatch loop for the two providers whose + * wire format can FULLY express tool-first ordering (Anthropic typed content + * blocks; OpenAI Responses output_index sequencing). A blocks-bearing fixture + * with `[toolCall, text]` must stream the tool BEFORE the text on both. + * + * A back-compat guard rounds out the suite: a legacy `{content, toolCalls}` + * fixture (no `blocks`) must still stream message/text FIRST — confirming the + * branch-not-replace design leaves the legacy path untouched end-to-end. + */ +import { describe, it, expect, afterEach, beforeEach } from "vitest"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { LLMock } from "../llmock.js"; + +// ─── SSE parsers (mirror content-with-toolcalls.test.ts) ───────────────────── + +function parseAnthropicSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> { + return body + .split("\n\n") + .filter((block) => block.trim().length > 0) + .map((block) => { + const dataLine = block.split("\n").find((l) => l.startsWith("data: ")); + if (!dataLine) return null; + return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown }; + }) + .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>; +} + +function parseResponsesSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> { + return body + .split("\n\n") + .filter((block) => block.trim().length > 0) + .map((block) => { + const dataLine = block.split("\n").find((l) => l.startsWith("data: ")); + if (!dataLine) return null; + return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown }; + }) + .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>; +} + +// ─── Tmp dir + real-loader fixture file ────────────────────────────────────── + +let tmpDir: string; +let mock: LLMock | null = null; + +beforeEach(() => { + tmpDir = mkdtempSync(join(tmpdir(), "fixture-blocks-e2e-")); +}); + +afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + rmSync(tmpDir, { recursive: true, force: true }); +}); + +/** Write a fixtures JSON file to the temp dir and return its path. */ +function writeFixtureFile(name: string, content: unknown): string { + const filePath = join(tmpDir, name); + writeFileSync(filePath, JSON.stringify(content), "utf-8"); + return filePath; +} + +describe("#274 e2e: ordered blocks loaded through the REAL JSON loader", () => { + it("Anthropic streams tool_use BEFORE text for a tool-first blocks .json fixture", async () => { + // A real on-disk JSON fixture with tool-first `blocks`. The legacy + // text-first {content, toolCalls} shape cannot express this ordering. + const filePath = writeFixtureFile("anthropic-tool-first.json", { + fixtures: [ + { + match: { userMessage: "e2e anthropic tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, + ], + }, + }, + ], + }); + + mock = new LLMock({ port: 0 }); + // THE REAL LOADER: reads + parses + normalizes the .json from disk. + mock.loadFixtureFile(filePath); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": "test-key", + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: "claude-sonnet-4-6", + max_tokens: 1024, + messages: [{ role: "user", content: "e2e anthropic tool-first" }], + stream: true, + }), + }); + + const events = parseAnthropicSSEEvents(await res.text()); + + const textBlockStart = events.find( + (e) => + e.type === "content_block_start" && (e.content_block as { type: string })?.type === "text", + ); + const toolBlockStart = events.find( + (e) => + e.type === "content_block_start" && + (e.content_block as { type: string })?.type === "tool_use", + ); + expect(textBlockStart).toBeDefined(); + expect(toolBlockStart).toBeDefined(); + + // Tool-first: the tool_use content block precedes the text content block. + const toolIdx = events.indexOf(toolBlockStart!); + const textIdx = events.indexOf(textBlockStart!); + expect(toolIdx).toBeLessThan(textIdx); + + const messageDelta = events.find((e) => e.type === "message_delta"); + expect((messageDelta!.delta as { stop_reason: string }).stop_reason).toBe("tool_use"); + }); + + it("Responses assigns function_call output_index 0 for a tool-first blocks .json fixture", async () => { + const filePath = writeFixtureFile("responses-tool-first.json", { + fixtures: [ + { + match: { userMessage: "e2e responses tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, + ], + }, + }, + ], + }); + + mock = new LLMock({ port: 0 }); + mock.loadFixtureFile(filePath); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/responses`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + input: [{ role: "user", content: "e2e responses tool-first" }], + stream: true, + }), + }); + + const events = parseResponsesSSEEvents(await res.text()); + + const fcAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "function_call", + ); + const msgAdded = events.find( + (e) => + e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message", + ); + expect(fcAdded).toBeDefined(); + expect(msgAdded).toBeDefined(); + expect((fcAdded as unknown as { output_index: number }).output_index).toBe(0); + expect((msgAdded as unknown as { output_index: number }).output_index).toBe(1); + + const completed = events.find((e) => e.type === "response.completed"); + const output = (completed!.response as { output: Array<{ type: string }> }).output; + const types = output.map((o) => o.type); + expect(types[0]).toBe("function_call"); + expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message")); + }); + + // ── BACK-COMPAT guard: a legacy {content, toolCalls} fixture (NO blocks) + // loaded through the real loader must still stream message/text FIRST. + // Proves branch-not-replace leaves the legacy path untouched end-to-end. ── + it("legacy .json fixture WITHOUT blocks keeps message-first ordering on Responses", async () => { + const filePath = writeFixtureFile("responses-legacy.json", { + fixtures: [ + { + match: { userMessage: "e2e responses legacy" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }, + ], + }); + + mock = new LLMock({ port: 0 }); + mock.loadFixtureFile(filePath); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/responses`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + input: [{ role: "user", content: "e2e responses legacy" }], + stream: true, + }), + }); + + const events = parseResponsesSSEEvents(await res.text()); + + const msgAdded = events.find( + (e) => + e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message", + ); + const fcAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "function_call", + ); + // Legacy hardcoding: message at index 0, function_call at index 1. + expect((msgAdded as unknown as { output_index: number }).output_index).toBe(0); + expect((fcAdded as unknown as { output_index: number }).output_index).toBe(1); + + const completed = events.find((e) => e.type === "response.completed"); + const output = (completed!.response as { output: Array<{ type: string }> }).output; + const types = output.map((o) => o.type); + expect(types.indexOf("message")).toBeLessThan(types.indexOf("function_call")); + }); +}); diff --git a/src/__tests__/fixture-blocks-gemini.test.ts b/src/__tests__/fixture-blocks-gemini.test.ts new file mode 100644 index 00000000..5353aee1 --- /dev/null +++ b/src/__tests__/fixture-blocks-gemini.test.ts @@ -0,0 +1,157 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { LLMock } from "../llmock.js"; + +type GeminiStreamChunk = { + candidates: Array<{ + content: { parts: Array<{ text?: string; functionCall?: { name: string } }> }; + finishReason?: string; + }>; +}; + +function parseGeminiSSE(body: string): GeminiStreamChunk[] { + return body + .split("\n\n") + .filter((block) => block.trim().length > 0) + .map((block) => { + const dataLine = block.split("\n").find((l) => l.startsWith("data: ")); + return dataLine ? (JSON.parse(dataLine.slice(6)) as GeminiStreamChunk) : null; + }) + .filter(Boolean) as GeminiStreamChunk[]; +} + +async function streamGemini(mock: LLMock, userMessage: string): Promise { + const res = await fetch( + `${mock.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [{ role: "user", parts: [{ text: userMessage }] }], + }), + }, + ); + return parseGeminiSSE(await res.text()); +} + +describe("Gemini — fixture block ordering (tool-first)", () => { + let mock: LLMock | null = null; + + afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + }); + + it("emits the functionCall part BEFORE the text part when blocks lead with a toolCall", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "gemini tool-first blocks" }, + response: { + content: "Here you go", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go" }, + ], + }, + }); + await mock.start(); + + const chunks = await streamGemini(mock, "gemini tool-first blocks"); + + const fcChunks = chunks.filter((c) => + c.candidates[0].content.parts.some((p) => p.functionCall !== undefined), + ); + const textChunks = chunks.filter((c) => + c.candidates[0].content.parts.some((p) => p.text !== undefined), + ); + + expect(fcChunks.length).toBeGreaterThan(0); + expect(textChunks.length).toBeGreaterThan(0); + + // The functionCall part must be emitted before the text part (tool-first order). + const firstFcIdx = chunks.indexOf(fcChunks[0]); + const firstTextIdx = chunks.indexOf(textChunks[0]); + expect(firstFcIdx).toBeLessThan(firstTextIdx); + + // finishReason still lands on the terminal chunk regardless of last block type. + const lastChunk = chunks[chunks.length - 1]; + expect(lastChunk.candidates[0].finishReason).toBe("FUNCTION_CALL"); + + const fcPart = fcChunks[0].candidates[0].content.parts.find((p) => p.functionCall); + expect(fcPart!.functionCall!.name).toBe("get_weather"); + }); + + it("back-compat: a fixture with no blocks streams identically to the legacy text-first path", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "gemini no blocks" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const chunks = await streamGemini(mock, "gemini no blocks"); + + const textChunks = chunks.filter((c) => + c.candidates[0].content.parts.some((p) => p.text !== undefined), + ); + const fcChunks = chunks.filter((c) => + c.candidates[0].content.parts.some((p) => p.functionCall !== undefined), + ); + + expect(textChunks.length).toBeGreaterThan(0); + expect(fcChunks.length).toBeGreaterThan(0); + + // Legacy: text-first, functionCall last, FUNCTION_CALL on terminal chunk. + const lastTextIdx = chunks.lastIndexOf(textChunks.at(-1)!); + const firstFcIdx = chunks.indexOf(fcChunks[0]); + expect(lastTextIdx).toBeLessThan(firstFcIdx); + + const lastChunk = chunks[chunks.length - 1]; + expect(lastChunk.candidates[0].finishReason).toBe("FUNCTION_CALL"); + + const fullText = textChunks + .flatMap((c) => c.candidates[0].content.parts.map((p) => p.text ?? "")) + .join(""); + expect(fullText).toBe("Sure."); + }); + + it("empty blocks array falls back to the legacy path (does not drop content/toolCalls)", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "gemini empty blocks" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [], + }, + }); + await mock.start(); + + const chunks = await streamGemini(mock, "gemini empty blocks"); + + const textChunks = chunks.filter((c) => + c.candidates[0].content.parts.some((p) => p.text !== undefined), + ); + const fcChunks = chunks.filter((c) => + c.candidates[0].content.parts.some((p) => p.functionCall !== undefined), + ); + + // Empty blocks must NOT silently drop content/toolCalls — legacy output emits both. + expect(textChunks.length).toBeGreaterThan(0); + expect(fcChunks.length).toBeGreaterThan(0); + + const fullText = textChunks + .flatMap((c) => c.candidates[0].content.parts.map((p) => p.text ?? "")) + .join(""); + expect(fullText).toBe("Sure."); + + // Terminal finishReason still present (not a malformed, finish-less stream). + const lastChunk = chunks[chunks.length - 1]; + expect(lastChunk.candidates[0].finishReason).toBe("FUNCTION_CALL"); + }); +}); diff --git a/src/__tests__/fixture-blocks-loader.test.ts b/src/__tests__/fixture-blocks-loader.test.ts new file mode 100644 index 00000000..b41c967c --- /dev/null +++ b/src/__tests__/fixture-blocks-loader.test.ts @@ -0,0 +1,134 @@ +import { describe, it, expect } from "vitest"; +import { entryToFixture } from "../fixture-loader.js"; +import type { FixtureFileEntry, ContentWithToolCallsResponse, FixtureBlock } from "../types.js"; + +/* ------------------------------------------------------------------ * + * #274 slot T1f — JSON fixture loader carries `blocks`. * + * * + * T0 added the optional `blocks?: FixtureBlock[]` to the IN-MEMORY * + * ContentWithToolCallsResponse. These tests pin that an ON-DISK JSON * + * fixture carrying `blocks` survives the loader normalization, that * + * a toolCall block's object `arguments` is auto-stringified just * + * like the sibling top-level `toolCalls[].arguments`, and that a * + * fixture with no `blocks` key loads byte-identically to before. * + * ------------------------------------------------------------------ */ + +describe("#274 fixture loader carries blocks", () => { + it("carries a tool-first blocks array through into the in-memory response, in order", () => { + const entry: FixtureFileEntry = { + match: { userMessage: "do it" }, + response: { + content: "Done.", + toolCalls: [{ name: "search", arguments: '{"q":"weather"}' }], + // tool-first ordering that the legacy text-first shape cannot express + blocks: [ + { type: "toolCall", name: "search", arguments: '{"q":"weather"}' }, + { type: "text", text: "Done." }, + ], + } as FixtureFileEntry["response"], + }; + + const fixture = entryToFixture(entry); + const resp = fixture.response as ContentWithToolCallsResponse; + + expect(resp.blocks).toBeDefined(); + expect(resp.blocks).toHaveLength(2); + expect(resp.blocks?.[0]).toEqual({ + type: "toolCall", + name: "search", + arguments: '{"q":"weather"}', + }); + expect(resp.blocks?.[1]).toEqual({ type: "text", text: "Done." }); + // Order preserved exactly as authored. + expect((resp.blocks as FixtureBlock[]).map((b) => b.type)).toEqual(["toolCall", "text"]); + }); + + it("auto-stringifies object arguments inside a toolCall block (mirrors toolCalls[].arguments)", () => { + const entry: FixtureFileEntry = { + match: { userMessage: "do it" }, + response: { + content: "ok", + toolCalls: [{ name: "save", arguments: { id: 1 } }], + blocks: [ + { type: "toolCall", name: "save", arguments: { id: 1, nested: { a: [1, 2] } } }, + { type: "text", text: "ok" }, + ], + } as unknown as FixtureFileEntry["response"], + }; + + const fixture = entryToFixture(entry); + const resp = fixture.response as ContentWithToolCallsResponse; + const block = resp.blocks?.[0] as { type: "toolCall"; arguments: string }; + + expect(typeof block.arguments).toBe("string"); + expect(block.arguments).toBe('{"id":1,"nested":{"a":[1,2]}}'); + }); + + it("leaves string arguments inside a toolCall block unchanged", () => { + const entry: FixtureFileEntry = { + match: { userMessage: "do it" }, + response: { + content: "ok", + toolCalls: [{ name: "save", arguments: '{"id":1}' }], + blocks: [{ type: "toolCall", name: "save", arguments: '{"id":1}' }], + } as FixtureFileEntry["response"], + }; + + const fixture = entryToFixture(entry); + const resp = fixture.response as ContentWithToolCallsResponse; + const block = resp.blocks?.[0] as { type: "toolCall"; arguments: string }; + expect(block.arguments).toBe('{"id":1}'); + }); + + it("leaves text blocks unchanged", () => { + const entry: FixtureFileEntry = { + match: { userMessage: "do it" }, + response: { + content: "Hello", + toolCalls: [{ name: "noop", arguments: "{}" }], + blocks: [{ type: "text", text: "Hello" }], + } as FixtureFileEntry["response"], + }; + + const fixture = entryToFixture(entry); + const resp = fixture.response as ContentWithToolCallsResponse; + expect(resp.blocks?.[0]).toEqual({ type: "text", text: "Hello" }); + }); + + it("back-compat: a fixture WITHOUT blocks loads identically (blocks stays undefined)", () => { + const entry: FixtureFileEntry = { + match: { userMessage: "legacy" }, + response: { + content: "Legacy answer.", + toolCalls: [{ name: "search", arguments: '{"q":"x"}' }], + }, + }; + + const fixture = entryToFixture(entry); + const resp = fixture.response as ContentWithToolCallsResponse; + + expect(resp.blocks).toBeUndefined(); + expect("blocks" in resp).toBe(false); + expect(resp.content).toBe("Legacy answer."); + expect(resp.toolCalls).toEqual([{ name: "search", arguments: '{"q":"x"}' }]); + }); + + it("ignores a non-array blocks value rather than corrupting the response", () => { + const entry: FixtureFileEntry = { + match: { userMessage: "bad" }, + response: { + content: "ok", + toolCalls: [{ name: "noop", arguments: "{}" }], + // malformed: blocks is not an array — loader leaves it as-is (no normalization), + // mirroring how toolCalls normalization is gated on Array.isArray. + blocks: "not-an-array", + } as unknown as FixtureFileEntry["response"], + }; + + const fixture = entryToFixture(entry); + const resp = fixture.response as ContentWithToolCallsResponse & { blocks?: unknown }; + // Non-array blocks pass through untouched (no stringify attempt, no crash); + // downstream validation/builders own shape rejection. + expect(resp.blocks).toBe("not-an-array"); + }); +}); diff --git a/src/__tests__/fixture-blocks-nonstreaming.test.ts b/src/__tests__/fixture-blocks-nonstreaming.test.ts new file mode 100644 index 00000000..520910ee --- /dev/null +++ b/src/__tests__/fixture-blocks-nonstreaming.test.ts @@ -0,0 +1,218 @@ +/** + * F2 (#274) — NON-streaming `blocks` ordering for order-observable surfaces. + * + * When a combined content+toolCalls fixture sets the optional `blocks` field, + * the non-streaming builders for the three surfaces whose response body is a + * positionally-observable ORDERED array MUST emit that array in block order: + * + * - Claude /v1/messages → `content[]` (text / tool_use) + * - Gemini :generateContent → `candidates[0].content.parts[]` + * - Responses /v1/responses → `output[]` (message / function_call) + * + * A `[toolCall, text]` fixture must therefore put the tool entry BEFORE the + * text entry in each non-streaming array (the opposite of the legacy + * text-first hardcoding). A fixture WITHOUT `blocks` must stay legacy + * text-first (back-compat). + * + * Real mock-server surface (mirrors the streaming per-provider tests): an + * actual `LLMock` listens, a real non-streaming HTTP request is made, and + * assertions read the wire JSON body. + */ +import { describe, it, expect, afterEach } from "vitest"; +import { LLMock } from "../llmock.js"; +import type { FixtureBlock } from "../types.js"; + +const TOOL_FIRST_BLOCKS: FixtureBlock[] = [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, +]; + +describe("Non-streaming fixture block ordering (#274)", () => { + let mock: LLMock | null = null; + + afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + }); + + // ── Claude /v1/messages — content[] is order-observable ─────────────────── + describe("Claude /v1/messages", () => { + async function postClaude(userMessage: string): Promise<{ + content: Array<{ type: string; text?: string; name?: string }>; + }> { + const res = await fetch(`${mock!.url}/v1/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": "test-key", + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: "claude-sonnet-4-6", + max_tokens: 1024, + messages: [{ role: "user", content: userMessage }], + stream: false, + }), + }); + return res.json() as Promise<{ + content: Array<{ type: string; text?: string; name?: string }>; + }>; + } + + it("tool-first blocks: content[] leads with tool_use, then text", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "claude nonstream tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: TOOL_FIRST_BLOCKS, + }, + }); + await mock.start(); + + const body = await postClaude("claude nonstream tool-first"); + const types = body.content.map((b) => b.type); + expect(types.indexOf("tool_use")).toBeLessThan(types.indexOf("text")); + expect(types[0]).toBe("tool_use"); + expect(body.content[0].name).toBe("get_weather"); + expect(body.content[1].text).toBe("Here you go."); + }); + + it("back-compat: no blocks keeps legacy text-first content[]", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "claude nonstream no-blocks" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const body = await postClaude("claude nonstream no-blocks"); + const types = body.content.map((b) => b.type); + expect(types.indexOf("text")).toBeLessThan(types.indexOf("tool_use")); + expect(types[0]).toBe("text"); + }); + }); + + // ── Gemini :generateContent — parts[] is order-observable ───────────────── + describe("Gemini :generateContent", () => { + async function postGemini(userMessage: string): Promise<{ + candidates: Array<{ + content: { parts: Array<{ text?: string; functionCall?: { name: string } }> }; + }>; + }> { + const res = await fetch(`${mock!.url}/v1beta/models/gemini-2.0-flash:generateContent`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [{ role: "user", parts: [{ text: userMessage }] }], + }), + }); + return res.json() as Promise<{ + candidates: Array<{ + content: { parts: Array<{ text?: string; functionCall?: { name: string } }> }; + }>; + }>; + } + + it("tool-first blocks: parts[] leads with functionCall, then text", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "gemini nonstream tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: TOOL_FIRST_BLOCKS, + }, + }); + await mock.start(); + + const body = await postGemini("gemini nonstream tool-first"); + const parts = body.candidates[0].content.parts; + const fcIdx = parts.findIndex((p) => p.functionCall); + const textIdx = parts.findIndex((p) => typeof p.text === "string" && !("thought" in p)); + expect(fcIdx).toBeGreaterThanOrEqual(0); + expect(textIdx).toBeGreaterThanOrEqual(0); + expect(fcIdx).toBeLessThan(textIdx); + expect(parts[0].functionCall?.name).toBe("get_weather"); + }); + + it("back-compat: no blocks keeps legacy text-first parts[]", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "gemini nonstream no-blocks" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const body = await postGemini("gemini nonstream no-blocks"); + const parts = body.candidates[0].content.parts; + const fcIdx = parts.findIndex((p) => p.functionCall); + const textIdx = parts.findIndex((p) => typeof p.text === "string"); + expect(textIdx).toBeLessThan(fcIdx); + expect(parts[0].text).toBe("Sure."); + }); + }); + + // ── OpenAI Responses /v1/responses — output[] is order-observable ───────── + describe("OpenAI Responses /v1/responses", () => { + async function postResponses(userMessage: string): Promise<{ + output: Array<{ type: string; name?: string }>; + }> { + const res = await fetch(`${mock!.url}/v1/responses`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + input: [{ role: "user", content: userMessage }], + stream: false, + }), + }); + return res.json() as Promise<{ output: Array<{ type: string; name?: string }> }>; + } + + it("tool-first blocks: output[] leads with function_call, then message", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "responses nonstream tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: TOOL_FIRST_BLOCKS, + }, + }); + await mock.start(); + + const body = await postResponses("responses nonstream tool-first"); + const types = body.output.map((o) => o.type); + expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message")); + expect(types[0]).toBe("function_call"); + expect(body.output[0].name).toBe("get_weather"); + }); + + it("back-compat: no blocks keeps legacy message-first output[]", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "responses nonstream no-blocks" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const body = await postResponses("responses nonstream no-blocks"); + const types = body.output.map((o) => o.type); + expect(types.indexOf("message")).toBeLessThan(types.indexOf("function_call")); + expect(types[0]).toBe("message"); + }); + }); +}); diff --git a/src/__tests__/fixture-blocks-ollama.test.ts b/src/__tests__/fixture-blocks-ollama.test.ts new file mode 100644 index 00000000..c7a4e2fc --- /dev/null +++ b/src/__tests__/fixture-blocks-ollama.test.ts @@ -0,0 +1,137 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { LLMock } from "../llmock.js"; + +interface OllamaChunk { + model?: string; + created_at?: string; + message?: { + role?: string; + content?: string; + reasoning_content?: string; + tool_calls?: Array<{ function: { name: string; arguments: unknown } }>; + }; + done?: boolean; + [key: string]: unknown; +} + +function parseNDJSON(body: string): OllamaChunk[] { + return body + .split("\n") + .filter((line) => line.trim().length > 0) + .map((line) => JSON.parse(line) as OllamaChunk); +} + +async function ollamaChatStream(mock: LLMock, userMessage: string): Promise { + const res = await fetch(`${mock.url}/api/chat`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "llama3.1", + messages: [{ role: "user", content: userMessage }], + stream: true, + }), + }); + return parseNDJSON(await res.text()); +} + +describe("Ollama — fixture block ordering (tool-first)", () => { + let mock: LLMock | null = null; + + afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + }); + + it("emits the tool_calls chunk before the content chunk when blocks are tool-first", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test ollama blocks tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, + ], + }, + }); + await mock.start(); + + const chunks = await ollamaChatStream(mock, "test ollama blocks tool-first"); + + const toolChunkIdx = chunks.findIndex((c) => c.message?.tool_calls?.length); + const contentChunkIdx = chunks.findIndex((c) => c.message?.content); + + expect(toolChunkIdx).toBeGreaterThanOrEqual(0); + expect(contentChunkIdx).toBeGreaterThanOrEqual(0); + // Tool-first block order: the tool_calls-bearing chunk precedes the content chunk. + expect(toolChunkIdx).toBeLessThan(contentChunkIdx); + + // Tool call payload is preserved. + const toolChunk = chunks[toolChunkIdx]; + expect(toolChunk.message!.tool_calls![0].function.name).toBe("get_weather"); + expect(toolChunk.message!.tool_calls![0].function.arguments).toEqual({ city: "NYC" }); + + // Content is preserved across content chunks, in order, after the tool call. + const fullContent = chunks.map((c) => c.message?.content ?? "").join(""); + expect(fullContent).toBe("Here you go."); + + // Final/done chunk preserved exactly as legacy (done:true + timing fields). + const doneChunk = chunks.at(-1)!; + expect(doneChunk.done).toBe(true); + expect(doneChunk).toHaveProperty("total_duration"); + }); + + it("back-compat: a no-blocks fixture is byte-identical to the legacy text-first stream", async () => { + // Legacy fixture (no blocks) drives the untouched else branch. + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test ollama legacy" }, + response: { + content: "Let me check.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + const legacyChunks = await ollamaChatStream(mock, "test ollama legacy"); + await mock.stop(); + mock = null; + + // Same content+toolCalls expressed as text-first blocks should produce the + // SAME wire order as the legacy path (content chunks first, then tool_calls). + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test ollama blocks text-first" }, + response: { + content: "Let me check.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "text", text: "Let me check." }, + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + ], + }, + }); + await mock.start(); + const blockChunks = await ollamaChatStream(mock, "test ollama blocks text-first"); + + // Normalize out per-request timestamps that legitimately differ. + const normalize = (chunks: OllamaChunk[]) => + chunks.map((c) => { + const { created_at, ...rest } = c; + void created_at; + return rest; + }); + + const legacyContentIdx = legacyChunks.findIndex((c) => c.message?.content); + const legacyToolIdx = legacyChunks.findIndex((c) => c.message?.tool_calls?.length); + const blockContentIdx = blockChunks.findIndex((c) => c.message?.content); + const blockToolIdx = blockChunks.findIndex((c) => c.message?.tool_calls?.length); + + // Legacy is text-first; text-first blocks must match that ordering. + expect(legacyContentIdx).toBeLessThan(legacyToolIdx); + expect(blockContentIdx).toBeLessThan(blockToolIdx); + expect(normalize(blockChunks)).toEqual(normalize(legacyChunks)); + }); +}); diff --git a/src/__tests__/fixture-blocks-openai.test.ts b/src/__tests__/fixture-blocks-openai.test.ts new file mode 100644 index 00000000..6c28c727 --- /dev/null +++ b/src/__tests__/fixture-blocks-openai.test.ts @@ -0,0 +1,159 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { LLMock } from "../llmock.js"; +import type { SSEChunk } from "../types.js"; + +function parseSSEChunks(body: string): SSEChunk[] { + return body + .split("\n\n") + .filter((line) => line.startsWith("data: ") && !line.includes("[DONE]")) + .map((line) => JSON.parse(line.slice(6)) as SSEChunk); +} + +describe("OpenAI Chat Completions — fixture block ordering (#274)", () => { + let mock: LLMock | null = null; + + afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + }); + + it("emits tool_call delta chunks before content delta chunks for a tool-first blocks fixture", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test blocks tool-first" }, + response: { + // Legacy fields preserved for the guard; blocks drives emission order. + content: "After the call.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "After the call." }, + ], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "test blocks tool-first" }], + stream: true, + }), + }); + + const chunks = parseSSEChunks(await res.text()); + const contentChunks = chunks.filter((c) => c.choices?.[0]?.delta?.content); + const toolChunks = chunks.filter((c) => c.choices?.[0]?.delta?.tool_calls); + const finishChunk = chunks.find((c) => c.choices?.[0]?.finish_reason); + + expect(contentChunks.length).toBeGreaterThan(0); + expect(toolChunks.length).toBeGreaterThan(0); + + // The block array is [toolCall, text], so the emitted SSE chunk SEQUENCE + // must place the tool_call delta chunk(s) BEFORE the content delta chunk(s). + const firstToolIdx = chunks.indexOf(toolChunks[0]); + const firstContentIdx = chunks.indexOf(contentChunks[0]); + expect(firstToolIdx).toBeLessThan(firstContentIdx); + + // Content + finish_reason preserved exactly as the legacy path. + const fullContent = contentChunks.map((c) => c.choices[0].delta.content).join(""); + expect(fullContent).toBe("After the call."); + expect(finishChunk!.choices[0].finish_reason).toBe("tool_calls"); + + // Tool call assembled correctly with index 0. + expect(toolChunks[0].choices[0].delta.tool_calls![0].index).toBe(0); + expect(toolChunks[0].choices[0].delta.tool_calls![0].function!.name).toBe("get_weather"); + }); + + it("assigns tool_call index in block encounter order for interleaved blocks", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test blocks interleave" }, + response: { + content: "A B", + toolCalls: [ + { name: "fn_a", arguments: '{"a":1}' }, + { name: "fn_b", arguments: '{"b":2}' }, + ], + blocks: [ + { type: "toolCall", name: "fn_a", arguments: '{"a":1}' }, + { type: "text", text: "A " }, + { type: "toolCall", name: "fn_b", arguments: '{"b":2}' }, + { type: "text", text: "B" }, + ], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "test blocks interleave" }], + stream: true, + }), + }); + + const chunks = parseSSEChunks(await res.text()); + + // Encounter-order index assignment: fn_a -> 0, fn_b -> 1. + const initialToolChunks = chunks.filter( + (c) => c.choices?.[0]?.delta?.tool_calls?.[0]?.function?.name, + ); + expect(initialToolChunks.map((c) => c.choices[0].delta.tool_calls![0].index)).toEqual([0, 1]); + expect(initialToolChunks[0].choices[0].delta.tool_calls![0].function!.name).toBe("fn_a"); + expect(initialToolChunks[1].choices[0].delta.tool_calls![0].function!.name).toBe("fn_b"); + + // Wire sequence reflects block order: first tool chunk precedes first content chunk. + const firstToolIdx = chunks.indexOf(initialToolChunks[0]); + const firstContentIdx = chunks.findIndex((c) => c.choices?.[0]?.delta?.content); + expect(firstToolIdx).toBeLessThan(firstContentIdx); + + const fullContent = chunks + .filter((c) => c.choices?.[0]?.delta?.content) + .map((c) => c.choices[0].delta.content) + .join(""); + expect(fullContent).toBe("A B"); + }); + + it("back-compat: a fixture WITHOUT blocks streams content-first (legacy path untouched)", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test no blocks legacy" }, + response: { + content: "Let me check.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "test no blocks legacy" }], + stream: true, + }), + }); + + const chunks = parseSSEChunks(await res.text()); + const contentChunks = chunks.filter((c) => c.choices?.[0]?.delta?.content); + const toolChunks = chunks.filter((c) => c.choices?.[0]?.delta?.tool_calls); + const finishChunk = chunks.find((c) => c.choices?.[0]?.finish_reason); + + // Legacy: content strictly before tool calls. + const lastContentIdx = chunks.lastIndexOf(contentChunks.at(-1)!); + const firstToolIdx = chunks.indexOf(toolChunks[0]); + expect(lastContentIdx).toBeLessThan(firstToolIdx); + + const fullContent = contentChunks.map((c) => c.choices[0].delta.content).join(""); + expect(fullContent).toBe("Let me check."); + expect(finishChunk!.choices[0].finish_reason).toBe("tool_calls"); + }); +}); diff --git a/src/__tests__/fixture-blocks-responses.test.ts b/src/__tests__/fixture-blocks-responses.test.ts new file mode 100644 index 00000000..21e7e46a --- /dev/null +++ b/src/__tests__/fixture-blocks-responses.test.ts @@ -0,0 +1,201 @@ +/** + * T1e — OpenAI Responses API: ordered `blocks` streaming. + * + * When a combined content+toolCalls fixture sets the optional `blocks` field, + * the Responses builder must assign `output_index` and assemble + * `response.completed.output` in the blocks' ARRAY ORDER. A `toolCall` block + * placed before a `text` block therefore yields a `function_call` output item + * at the LOWER `output_index`, appearing FIRST in the final `output` array — + * the opposite of the legacy (message-always-first) hardcoding. + * + * Real mock-server surface (mirrors content-with-toolcalls.test.ts): an actual + * `LLMock` listens, a real HTTP request streams SSE, and assertions read the + * wire bytes. + */ +import { describe, it, expect, afterEach } from "vitest"; +import { LLMock } from "../llmock.js"; +import type { FixtureBlock } from "../types.js"; + +function parseResponsesSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> { + return body + .split("\n\n") + .filter((block) => block.trim().length > 0) + .map((block) => { + const dataLine = block.split("\n").find((l) => l.startsWith("data: ")); + if (!dataLine) return null; + return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown }; + }) + .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>; +} + +describe("OpenAI Responses API — fixture block ordering (#274)", () => { + let mock: LLMock | null = null; + + afterEach(async () => { + if (mock) { + await mock.stop(); + mock = null; + } + }); + + it("tool-first blocks: function_call takes output_index 0 and leads response.output", async () => { + mock = new LLMock({ port: 0 }); + const blocks: FixtureBlock[] = [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, + ]; + mock.addFixture({ + match: { userMessage: "responses blocks tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks, + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/responses`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + input: [{ role: "user", content: "responses blocks tool-first" }], + stream: true, + }), + }); + + const events = parseResponsesSSEEvents(await res.text()); + + // The function_call output item must be added at output_index 0 (before the + // message item), proving block-order output-index assignment. + const fcAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "function_call", + ); + const msgAdded = events.find( + (e) => + e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message", + ); + expect(fcAdded).toBeDefined(); + expect(msgAdded).toBeDefined(); + expect((fcAdded as { output_index: number }).output_index).toBe(0); + expect((msgAdded as { output_index: number }).output_index).toBe(1); + + // The final completed.output array must lead with the function_call item. + const completed = events.find((e) => e.type === "response.completed"); + const output = (completed!.response as { output: Array<{ type: string }> }).output; + const types = output.map((o) => o.type); + expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message")); + expect(types[0]).toBe("function_call"); + + // Content + arguments still stream fully. + const allTextDeltas = events + .filter((e) => e.type === "response.output_text.delta") + .map((e) => (e as unknown as { delta: string }).delta) + .join(""); + expect(allTextDeltas).toBe("Here you go."); + const allArgDeltas = events + .filter((e) => e.type === "response.function_call_arguments.delta") + .map((e) => (e as unknown as { delta: string }).delta) + .join(""); + expect(allArgDeltas).toBe('{"city":"NYC"}'); + }); + + it("back-compat: a fixture WITHOUT blocks keeps the legacy message-first ordering", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "responses no blocks legacy" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/responses`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + input: [{ role: "user", content: "responses no blocks legacy" }], + stream: true, + }), + }); + + const events = parseResponsesSSEEvents(await res.text()); + + const msgAdded = events.find( + (e) => + e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message", + ); + const fcAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "function_call", + ); + // Legacy hardcoding: message at index 0, function_call at index 1. + expect((msgAdded as { output_index: number }).output_index).toBe(0); + expect((fcAdded as { output_index: number }).output_index).toBe(1); + + const completed = events.find((e) => e.type === "response.completed"); + const output = (completed!.response as { output: Array<{ type: string }> }).output; + const types = output.map((o) => o.type); + expect(types.indexOf("message")).toBeLessThan(types.indexOf("function_call")); + }); + + it("empty blocks array falls back to the legacy path (content/toolCalls + terminal completed)", async () => { + mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "responses empty blocks" }, + response: { + content: "Sure.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/responses`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ + model: "gpt-4o", + input: [{ role: "user", content: "responses empty blocks" }], + stream: true, + }), + }); + + const events = parseResponsesSSEEvents(await res.text()); + + // Empty blocks must fall back to legacy: both a message and a function_call item. + const msgAdded = events.find( + (e) => + e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message", + ); + const fcAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "function_call", + ); + expect(msgAdded).toBeDefined(); + expect(fcAdded).toBeDefined(); + + // Content + arguments still stream fully (not silently dropped). + const allTextDeltas = events + .filter((e) => e.type === "response.output_text.delta") + .map((e) => (e as unknown as { delta: string }).delta) + .join(""); + expect(allTextDeltas).toBe("Sure."); + const allArgDeltas = events + .filter((e) => e.type === "response.function_call_arguments.delta") + .map((e) => (e as unknown as { delta: string }).delta) + .join(""); + expect(allArgDeltas).toBe('{"city":"NYC"}'); + + // Terminal completed event with usage is present (not a role+finish-only stream). + const completed = events.find((e) => e.type === "response.completed"); + expect(completed).toBeDefined(); + expect((completed!.response as { usage?: unknown }).usage).toBeDefined(); + }); +}); diff --git a/src/__tests__/fixture-blocks-scoped-out.test.ts b/src/__tests__/fixture-blocks-scoped-out.test.ts new file mode 100644 index 00000000..a325b545 --- /dev/null +++ b/src/__tests__/fixture-blocks-scoped-out.test.ts @@ -0,0 +1,143 @@ +/** + * #274 slot T3 — SCOPED-OUT consumer safety for ordered `blocks`. + * + * The `blocks` field is honored only by the five in-scope stream builders + * (OpenAI chat, Anthropic, Gemini, Ollama, OpenAI Responses + the WS Responses + * dispatch). The OTHER consumers of `isContentWithToolCallsResponse` — + * Bedrock (`/model/{id}/invoke`), Cohere (`/v2/chat`), and Gemini Interactions + * (`/v1beta/interactions`) — were deliberately left UNCHANGED: they read only + * `.content` / `.toolCalls` and must completely IGNORE `.blocks`. + * + * These tests drive each scoped-out consumer with a fixture that ALSO carries a + * `blocks` array (in an order that differs from the legacy text-first shape). + * The consumer must NOT crash and must serve the legacy `{content, toolCalls}` + * payload exactly as if `blocks` were absent. + */ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import type { Fixture, FixtureBlock } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +// A combined content+toolCalls fixture that ALSO carries a tool-first `blocks` +// array — the exact shape the scoped-out consumers must ignore. +const toolFirstBlocks: FixtureBlock[] = [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"SF"}' }, + { type: "text", text: "Let me help you" }, +]; + +const blocksBearingFixture: Fixture = { + match: { userMessage: "scoped-out blocks" }, + response: { + content: "Let me help you", + toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }], + blocks: toolFirstBlocks, + }, +}; + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +describe("#274 scoped-out consumers ignore `blocks` without crashing", () => { + it("Bedrock /model/{id}/invoke serves legacy content+tool_use, ignoring blocks", async () => { + instance = await createServer([blocksBearingFixture]); + const res = await post( + `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`, + { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "scoped-out blocks" }], + }, + ); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.type).toBe("message"); + // Legacy text-first Anthropic shape: text content then tool_use — NOT the + // tool-first ordering carried in `blocks` (which Bedrock must ignore). + expect(body.content[0].type).toBe("text"); + expect(body.content[0].text).toBe("Let me help you"); + expect(body.content[1].type).toBe("tool_use"); + expect(body.content[1].name).toBe("get_weather"); + expect(body.content[1].input).toEqual({ city: "SF" }); + expect(body.stop_reason).toBe("tool_use"); + }); + + it("Cohere /v2/chat serves legacy content+tool_calls, ignoring blocks", async () => { + instance = await createServer([blocksBearingFixture]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "scoped-out blocks" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + // Cohere reads only content/toolCalls; blocks is ignored, no crash. + expect(body.message.tool_calls).toHaveLength(1); + expect(body.message.tool_calls[0].function.name).toBe("get_weather"); + expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"SF"}'); + }); + + it("Gemini Interactions /v1beta/interactions serves legacy steps, ignoring blocks", async () => { + instance = await createServer([blocksBearingFixture]); + const res = await post(`${instance.url}/v1beta/interactions`, { + model: "gemini-2.5-flash", + input: "scoped-out blocks", + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + // Reads only content/toolCalls; blocks is ignored, no crash. + expect(body.status).toBe("requires_action"); + expect(body.output_text).toBe("Let me help you"); + expect(body.steps).toHaveLength(2); + expect(body.steps[0].type).toBe("model_output"); + expect(body.steps[1].type).toBe("function_call"); + expect(body.steps[1].name).toBe("get_weather"); + }); +}); diff --git a/src/__tests__/fixture-loader.test.ts b/src/__tests__/fixture-loader.test.ts index cc0d0b4f..c7701a22 100644 --- a/src/__tests__/fixture-loader.test.ts +++ b/src/__tests__/fixture-loader.test.ts @@ -762,6 +762,160 @@ describe("validateFixtures", () => { ).toBe(true); }); + // --- blocks checks (#274 F3+F8) --- + // A malformed `blocks` array must be REJECTED at load time so it never + // reaches the dispatch/builder (where resolveFixtureBlocks would throw AFTER + // the journal already recorded status:200). Mirrors the toolCalls checks. + + it("error: blocks is not an array", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + blocks: "not-an-array", + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some((r) => r.severity === "error" && r.message.includes("blocks must be an array")), + ).toBe(true); + }); + + it("error: block with unknown type", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + blocks: [{ type: "bogus" }], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => r.severity === "error" && r.message.includes("type") && r.message.includes("blocks"), + ), + ).toBe(true); + }); + + it("error: text block with non-string text", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + // object `text` would replay as `[object Object]` + blocks: [{ type: "text", text: { nested: true } }], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => + r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("text"), + ), + ).toBe(true); + }); + + it("error: toolCall block with non-string name", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + blocks: [{ type: "toolCall", name: 123, arguments: "{}" }], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => + r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("name"), + ), + ).toBe(true); + }); + + it("error: toolCall block with empty name", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + blocks: [{ type: "toolCall", name: "", arguments: "{}" }], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => + r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("name"), + ), + ).toBe(true); + }); + + it("error: toolCall block with invalid-JSON arguments", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + blocks: [{ type: "toolCall", name: "fn", arguments: "not json" }], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => + r.severity === "error" && + r.message.includes("blocks[0]") && + r.message.includes("not valid JSON"), + ), + ).toBe(true); + }); + + it("error: toolCall block with non-string id", () => { + const fixtures = [ + makeFixture({ + response: { + content: "ok", + toolCalls: [{ name: "fn", arguments: "{}" }], + blocks: [{ type: "toolCall", name: "fn", arguments: "{}", id: 7 }], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect( + results.some( + (r) => + r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("id"), + ), + ).toBe(true); + }); + + it("no error: a valid blocks array passes validation", () => { + const fixtures = [ + makeFixture({ + response: { + content: "Done.", + toolCalls: [{ name: "search", arguments: '{"q":"x"}' }], + blocks: [ + { type: "toolCall", name: "search", arguments: '{"q":"x"}', id: "call_1" }, + { type: "text", text: "Done." }, + ], + } as never, + }), + ]; + const results = validateFixtures(fixtures); + expect(results.filter((r) => r.severity === "error")).toEqual([]); + }); + it("error: error response with empty message", () => { const fixtures = [ makeFixture({ response: { error: { message: "", type: "e" }, status: 500 } }), diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts index bd84cd0a..da925d38 100644 --- a/src/__tests__/recorder.test.ts +++ b/src/__tests__/recorder.test.ts @@ -776,6 +776,120 @@ describe("recorder streaming collapse", () => { } }); + // ---- Ordered `blocks` persistence (#274) --------------------------------- + // A tool-call-before-text Anthropic stream is interleaved, so the recorder + // must persist the ordered `blocks` array; an ordinary text-then-tools stream + // is NOT interleaved, so the recorder keeps the legacy `{content, toolCalls}` + // shape with NO `blocks` key (golden recordings stay byte-identical). + async function recordAnthropicStream( + sse: string, + prefix: string, + ): Promise> { + const anthropicUpstream = http.createServer((_upReq, upRes) => { + upRes.writeHead(200, { "Content-Type": "text/event-stream" }); + upRes.end(sse); + }); + await new Promise((resolve) => anthropicUpstream.listen(0, "127.0.0.1", () => resolve())); + const upstreamPort = (anthropicUpstream.address() as { port: number }).port; + const fixturePath = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); + + const recorderServer = http.createServer((req, res) => { + const chunks: Buffer[] = []; + req.on("data", (c: Buffer) => chunks.push(c)); + req.on("end", async () => { + const rawBody = Buffer.concat(chunks).toString(); + await proxyAndRecord( + req, + res, + JSON.parse(rawBody), + "anthropic", + "/v1/messages", + [], + { + record: { + providers: { anthropic: `http://127.0.0.1:${upstreamPort}` }, + fixturePath, + }, + logger: new Logger("silent"), + }, + rawBody, + ); + }); + }); + await new Promise((resolve) => recorderServer.listen(0, "127.0.0.1", () => resolve())); + const recorderPort = (recorderServer.address() as { port: number }).port; + + try { + const resp = await post(`http://127.0.0.1:${recorderPort}/v1/messages`, { + model: "claude-3-7-sonnet-20250219", + max_tokens: 1024, + stream: true, + messages: [{ role: "user", content: "go" }], + }); + expect(resp.status).toBe(200); + const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json")); + expect(files).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"), + ) as FixtureFile; + return fixtureContent.fixtures[0].response as Record; + } finally { + await new Promise((resolve) => anthropicUpstream.close(() => resolve())); + await new Promise((resolve) => recorderServer.close(() => resolve())); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + } + + it("persists ordered blocks for a tool-before-text streamed turn", async () => { + const sse = [ + `event: content_block_start\ndata: ${JSON.stringify({ index: 0, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "input_json_delta", partial_json: '{"city":"Paris"}' } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 0 })}`, + "", + `event: content_block_start\ndata: ${JSON.stringify({ index: 1, content_block: { type: "text", text: "" } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 1, delta: { type: "text_delta", text: "Done." } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 1 })}`, + "", + `event: message_stop\ndata: {}`, + "", + ].join("\n"); + const saved = await recordAnthropicStream(sse, "aimock-recorder-blocks-tool-"); + expect(saved.blocks).toEqual([ + { type: "toolCall", name: "get_weather", arguments: '{"city":"Paris"}', id: "toolu_1" }, + { type: "text", text: "Done." }, + ]); + // Legacy fields remain populated for replay/back-compat. + expect(saved.content).toBe("Done."); + expect(saved.toolCalls).toHaveLength(1); + }); + + it("persists the legacy shape (no blocks) for a text-then-tools streamed turn", async () => { + const sse = [ + `event: content_block_start\ndata: ${JSON.stringify({ index: 0, content_block: { type: "text", text: "" } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "Sure." } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 0 })}`, + "", + `event: content_block_start\ndata: ${JSON.stringify({ index: 1, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`, + "", + `event: content_block_delta\ndata: ${JSON.stringify({ index: 1, delta: { type: "input_json_delta", partial_json: '{"city":"Paris"}' } })}`, + "", + `event: content_block_stop\ndata: ${JSON.stringify({ index: 1 })}`, + "", + `event: message_stop\ndata: {}`, + "", + ].join("\n"); + const saved = await recordAnthropicStream(sse, "aimock-recorder-blocks-text-"); + expect(saved.blocks).toBeUndefined(); + expect(saved.content).toBe("Sure."); + expect(saved.toolCalls).toHaveLength(1); + }); + it("captures Anthropic redacted_thinking block data into the recorded fixture's redactedThinking", async () => { const REDACTED_DATA = "EncryptedRedactedThinkingPayloadAAA=="; // Raw Anthropic SSE upstream that streams a redacted_thinking block (its diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts index f4d3087e..4908cb84 100644 --- a/src/__tests__/stream-collapse.test.ts +++ b/src/__tests__/stream-collapse.test.ts @@ -3583,3 +3583,349 @@ describe("harmony fail-safe — quoted whole-message ambiguity (known limitation expect(direct.content).not.toBe("To emit write hello<|return|> and then stop"); }); }); + +// --------------------------------------------------------------------------- +// Cross-channel block-order instrumentation (#274) +// +// The collapsers must retain enough cross-channel order to let the recorder +// decide whether a stream is "interleaved" — a tool-call delta appears +// strictly before the first content delta, OR a content delta appears after +// any tool-call delta. When interleaved, `CollapseResult.blocks` carries the +// ordered FixtureBlock[] in stream order. When NOT interleaved (text-first, +// text-only, or tool-only), `blocks` stays undefined so the recorder persists +// the legacy `{ content, toolCalls }` shape byte-identically. +// --------------------------------------------------------------------------- + +describe("stream block-order instrumentation (#274)", () => { + // ---- OpenAI SSE ---------------------------------------------------------- + describe("collapseOpenAISSE blocks", () => { + const textDelta = (text: string) => + `data: ${JSON.stringify({ choices: [{ delta: { content: text } }] })}`; + const toolDelta = (index: number, opts: { id?: string; name?: string; args?: string }) => + `data: ${JSON.stringify({ + choices: [ + { + delta: { + tool_calls: [ + { + index, + ...(opts.id ? { id: opts.id } : {}), + function: { + ...(opts.name ? { name: opts.name } : {}), + ...(opts.args !== undefined ? { arguments: opts.args } : {}), + }, + }, + ], + }, + }, + ], + })}`; + + it("text-first stream is NOT interleaved → no blocks", () => { + const body = [ + textDelta("Hello "), + "", + textDelta("world"), + "", + toolDelta(0, { id: "call_1", name: "get_weather", args: '{"city":"Paris"}' }), + "", + "data: [DONE]", + "", + ].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Hello world"); + expect(result.toolCalls).toHaveLength(1); + expect(result.blocks).toBeUndefined(); + }); + + it("tool-first stream is interleaved → blocks in tool-first order", () => { + const body = [ + toolDelta(0, { id: "call_1", name: "get_weather", args: '{"city":"Paris"}' }), + "", + textDelta("Here you go"), + "", + "data: [DONE]", + "", + ].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.blocks).toBeDefined(); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "get_weather", arguments: '{"city":"Paris"}', id: "call_1" }, + { type: "text", text: "Here you go" }, + ]); + }); + + it("tools→text→tools interleave is captured in stream order", () => { + const body = [ + toolDelta(0, { id: "call_1", name: "a", args: "{}" }), + "", + textDelta("middle"), + "", + toolDelta(1, { id: "call_2", name: "b", args: "{}" }), + "", + "data: [DONE]", + "", + ].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.blocks).toBeDefined(); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "a", arguments: "{}", id: "call_1" }, + { type: "text", text: "middle" }, + { type: "toolCall", name: "b", arguments: "{}", id: "call_2" }, + ]); + }); + + it("text-only stream has no blocks", () => { + const body = [textDelta("just text"), "", "data: [DONE]", ""].join("\n"); + expect(collapseOpenAISSE(body).blocks).toBeUndefined(); + }); + + it("tool-only stream has no blocks (no content channel to order against)", () => { + const body = [ + toolDelta(0, { id: "call_1", name: "a", args: "{}" }), + "", + "data: [DONE]", + "", + ].join("\n"); + expect(collapseOpenAISSE(body).blocks).toBeUndefined(); + }); + + // ---- #274 F4/F5: blocks must agree with flat toolCalls ----------------- + + it("out-of-arrival-order tool indices: blocks[i] and toolCalls[i] are the SAME call (F4)", () => { + // First-arriving tool call carries the HIGHER index (5); second carries + // the LOWER (1). Index-sorting the flat list would put call_low first, + // disagreeing with the stream-arrival-ordered blocks. Interleave a text + // delta after the first tool so `blocks` is emitted. + const body = [ + toolDelta(5, { id: "call_high", name: "first_arrived", args: '{"a":1}' }), + "", + textDelta("between"), + "", + toolDelta(1, { id: "call_low", name: "second_arrived", args: '{"b":2}' }), + "", + "data: [DONE]", + "", + ].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.blocks).toBeDefined(); + const blockToolCalls = result.blocks!.filter((b) => b.type === "toolCall"); + // blocks[i] and toolCalls[i] must describe the SAME call (consistent + // ordering + identity). Pre-fix the flat list was index-sorted + // (call_low first) while blocks were arrival-sorted (call_high first). + expect(result.toolCalls).toHaveLength(blockToolCalls.length); + result.toolCalls!.forEach((tc, i) => { + const block = blockToolCalls[i] as { name: string; arguments: string; id?: string }; + expect(block.name).toBe(tc.name); + expect(block.arguments).toBe(tc.arguments); + expect(block.id).toBe(tc.id); + }); + // Concretely: the first call by stream arrival is `first_arrived`. + expect(result.toolCalls![0].name).toBe("first_arrived"); + expect((blockToolCalls[0] as { name: string }).name).toBe("first_arrived"); + }); + + it("tool-call with no argument deltas: block arguments is valid JSON '{}', matching flat (F5)", () => { + // The first tool call NEVER receives any `arguments` fragment — its + // accumulator stays "". A text delta after it makes the stream + // interleaved so `blocks` is emitted. + const body = [ + toolDelta(0, { id: "call_noargs", name: "no_args" }), + "", + textDelta("after"), + "", + "data: [DONE]", + "", + ].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.blocks).toBeDefined(); + const block = result.blocks!.find((b) => b.type === "toolCall") as { + arguments: string; + }; + // Pre-fix: block.arguments === "" (invalid JSON), flat sanitized to "{}". + expect(block.arguments).toBe("{}"); + expect(() => JSON.parse(block.arguments)).not.toThrow(); + // And it agrees with the flat representation. + expect(result.toolCalls![0].arguments).toBe("{}"); + expect(block.arguments).toBe(result.toolCalls![0].arguments); + }); + }); + + // ---- Anthropic SSE ------------------------------------------------------- + describe("collapseAnthropicSSE blocks", () => { + const textBlock = (index: number, text: string) => + [ + `event: content_block_start`, + `data: ${JSON.stringify({ type: "content_block_start", index, content_block: { type: "text", text: "" } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index, delta: { type: "text_delta", text } })}`, + "", + `event: content_block_stop`, + `data: ${JSON.stringify({ type: "content_block_stop", index })}`, + ].join("\n"); + const toolBlock = (index: number, id: string, name: string, args: string) => + [ + `event: content_block_start`, + `data: ${JSON.stringify({ type: "content_block_start", index, content_block: { type: "tool_use", id, name, input: {} } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: args } })}`, + "", + `event: content_block_stop`, + `data: ${JSON.stringify({ type: "content_block_stop", index })}`, + ].join("\n"); + + it("text-first → no blocks", () => { + const body = [textBlock(0, "Hi"), "", toolBlock(1, "toolu_1", "fn", "{}"), ""].join("\n"); + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("Hi"); + expect(result.toolCalls).toHaveLength(1); + expect(result.blocks).toBeUndefined(); + }); + + it("tool-first → blocks in tool-first order", () => { + const body = [toolBlock(0, "toolu_1", "fn", '{"x":1}'), "", textBlock(1, "after"), ""].join( + "\n", + ); + const result = collapseAnthropicSSE(body); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "fn", arguments: '{"x":1}', id: "toolu_1" }, + { type: "text", text: "after" }, + ]); + }); + + it("tools→text→tools interleave captured in order", () => { + const body = [ + toolBlock(0, "toolu_1", "a", "{}"), + "", + textBlock(1, "mid"), + "", + toolBlock(2, "toolu_2", "b", "{}"), + "", + ].join("\n"); + const result = collapseAnthropicSSE(body); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "a", arguments: "{}", id: "toolu_1" }, + { type: "text", text: "mid" }, + { type: "toolCall", name: "b", arguments: "{}", id: "toolu_2" }, + ]); + }); + }); + + // ---- Gemini SSE ---------------------------------------------------------- + describe("collapseGeminiSSE blocks", () => { + const textPart = (text: string) => + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text }] } }] })}`; + const fcPart = (name: string, args: Record) => + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ functionCall: { name, args } }] } }] })}`; + + it("text-first → no blocks", () => { + const body = [textPart("Hi"), "", fcPart("fn", {}), ""].join("\n"); + const result = collapseGeminiSSE(body); + expect(result.content).toBe("Hi"); + expect(result.toolCalls).toHaveLength(1); + expect(result.blocks).toBeUndefined(); + }); + + it("tool-first → blocks in tool-first order", () => { + const body = [fcPart("fn", { x: 1 }), "", textPart("after"), ""].join("\n"); + const result = collapseGeminiSSE(body); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "fn", arguments: '{"x":1}' }, + { type: "text", text: "after" }, + ]); + }); + + it("tools→text→tools interleave captured in order", () => { + const body = [fcPart("a", {}), "", textPart("mid"), "", fcPart("b", {}), ""].join("\n"); + const result = collapseGeminiSSE(body); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "a", arguments: "{}" }, + { type: "text", text: "mid" }, + { type: "toolCall", name: "b", arguments: "{}" }, + ]); + }); + + // #274 R2-N2: an interleaved Gemini AUDIO turn must NOT carry `blocks`. + // The audio collapse shape (AudioResponse) has no `blocks` slot and the + // recorder's audio branch never persists it — so producing ordered blocks + // on the audio path is silently produced-then-dropped. The companion + // content / toolCalls / reasoning are still preserved (flat), just no blocks. + const audioPart = (mimeType: string, data: string) => + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ inlineData: { mimeType, data } }] } }] })}`; + + it("audio turn with interleaved tool/text deltas → no blocks (audio shape)", () => { + const body = [ + fcPart("fn", { x: 1 }), + "", + textPart("between"), + "", + audioPart("audio/mpeg", "QUJD"), + "", + ].join("\n"); + const result = collapseGeminiSSE(body); + // Audio shape: audio bytes captured, companions preserved flat. + expect(result.audioB64).toBe("QUJD"); + expect(result.audioMimeType).toBe("audio/mpeg"); + expect(result.content).toBe("between"); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("fn"); + // The audio result type cannot carry ordered blocks — must be absent. + expect(result.blocks).toBeUndefined(); + }); + + it("non-audio interleaved turn STILL produces blocks (F4F5 regression guard)", () => { + const body = [fcPart("fn", { x: 1 }), "", textPart("after"), ""].join("\n"); + const result = collapseGeminiSSE(body); + expect(result.audioB64).toBeUndefined(); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "fn", arguments: '{"x":1}' }, + { type: "text", text: "after" }, + ]); + }); + }); + + // ---- Ollama NDJSON ------------------------------------------------------- + describe("collapseOllamaNDJSON blocks", () => { + const textLine = (content: string) => + JSON.stringify({ model: "llama3", message: { role: "assistant", content }, done: false }); + const toolLine = (name: string, args: Record) => + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "", + tool_calls: [{ function: { name, arguments: args } }], + }, + done: false, + }); + + it("text-first → no blocks", () => { + const body = [textLine("Hi"), toolLine("fn", {})].join("\n"); + const result = collapseOllamaNDJSON(body); + expect(result.content).toBe("Hi"); + expect(result.toolCalls).toHaveLength(1); + expect(result.blocks).toBeUndefined(); + }); + + it("tool-first → blocks in tool-first order", () => { + const body = [toolLine("fn", { x: 1 }), textLine("after")].join("\n"); + const result = collapseOllamaNDJSON(body); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "fn", arguments: '{"x":1}' }, + { type: "text", text: "after" }, + ]); + }); + + it("tools→text→tools interleave captured in order", () => { + const body = [toolLine("a", {}), textLine("mid"), toolLine("b", {})].join("\n"); + const result = collapseOllamaNDJSON(body); + expect(result.blocks).toEqual([ + { type: "toolCall", name: "a", arguments: "{}" }, + { type: "text", text: "mid" }, + { type: "toolCall", name: "b", arguments: "{}" }, + ]); + }); + }); +}); diff --git a/src/__tests__/ws-responses.test.ts b/src/__tests__/ws-responses.test.ts index 5f32187c..eda04698 100644 --- a/src/__tests__/ws-responses.test.ts +++ b/src/__tests__/ws-responses.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, afterEach } from "vitest"; import { createServer, type ServerInstance } from "../server.js"; -import type { Fixture } from "../types.js"; +import type { Fixture, FixtureBlock } from "../types.js"; import { connectWebSocket } from "./ws-test-client.js"; import { SKIPPED_BY_STATE_RE } from "./helpers/strict-matchers.js"; @@ -50,6 +50,23 @@ const toolReasoningFixture: Fixture = { }, }; +// Combined content+toolCalls fixture carrying an ORDERED `blocks` array placing +// the tool call BEFORE the text. On the WebSocket /v1/responses surface this +// must yield the function_call output item at a LOWER output_index than the +// message item — i.e. the function_call leads the output. Mirrors the HTTP +// fixture-blocks-responses.test.ts assertions but drives the WS dispatch path. +const wsBlocksToolFirstFixture: Fixture = { + match: { userMessage: "ws blocks tool-first" }, + response: { + content: "Here you go.", + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + blocks: [ + { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' }, + { type: "text", text: "Here you go." }, + ] as FixtureBlock[], + }, +}; + const allFixtures: Fixture[] = [ textFixture, toolFixture, @@ -57,6 +74,7 @@ const allFixtures: Fixture[] = [ reasoningFixture, capabilityReasoningFixture, toolReasoningFixture, + wsBlocksToolFirstFixture, ]; // --- tests --- @@ -368,6 +386,65 @@ describe("WebSocket /v1/responses", () => { ws.close(); }); + // ── #274: ordered `blocks` must flow through the WS Responses dispatch so the + // websocket surface honors tool-first ordering exactly like the HTTP path. ── + it("honors fixture block order: function_call leads the message item (tool-first)", async () => { + instance = await createServer(allFixtures); + const ws = await connectWebSocket(instance.url, "/v1/responses"); + + ws.send(responseCreateMsg("ws blocks tool-first")); + + // Collect until response.completed (chunking makes the count variable). + const maxEvents = 50; + let events: WSEvent[] = []; + for (let count = 1; ; count++) { + if (count > maxEvents) { + throw new Error( + `response.completed never arrived within ${maxEvents} events ` + + `(last event type: ${events[events.length - 1]?.type})`, + ); + } + events = parseEvents(await ws.waitForMessages(count)); + if (events[events.length - 1].type === "response.completed") break; + } + + const fcAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "function_call", + ); + const msgAdded = events.find( + (e) => + e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message", + ); + expect(fcAdded).toBeDefined(); + expect(msgAdded).toBeDefined(); + // Tool-first: the function_call item takes output_index 0, the message 1. + expect((fcAdded as unknown as { output_index: number }).output_index).toBe(0); + expect((msgAdded as unknown as { output_index: number }).output_index).toBe(1); + + // The terminal completed.output array must lead with the function_call item. + const completed = events.find((e) => e.type === "response.completed"); + const output = (completed!.response as { output: Array<{ type: string }> }).output; + const types = output.map((o) => o.type); + expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message")); + expect(types[0]).toBe("function_call"); + + // Content + arguments still stream fully. + const textDeltas = events + .filter((e) => e.type === "response.output_text.delta") + .map((e) => (e as unknown as { delta: string }).delta) + .join(""); + expect(textDeltas).toBe("Here you go."); + const argDeltas = events + .filter((e) => e.type === "response.function_call_arguments.delta") + .map((e) => (e as unknown as { delta: string }).delta) + .join(""); + expect(argDeltas).toBe('{"city":"NYC"}'); + + ws.close(); + }); + it("rejects WebSocket upgrade on non-responses path", async () => { instance = await createServer(allFixtures); From 2d132df21338a753edfd96c0fe0280f96c9efd1f Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 26 Jun 2026 23:11:49 -0700 Subject: [PATCH 5/5] docs(fixtures): document the blocks array and per-provider ordering observability (#274) Document the fixture blocks array, per-provider ordering behavior, and the observability surface, and record the change in the changelog. --- CHANGELOG.md | 3 + docs/chat-completions/index.html | 11 +++ docs/claude-messages/index.html | 10 +++ docs/fixtures/index.html | 122 ++++++++++++++++++++++++++++++- docs/gemini/index.html | 10 +++ docs/ollama/index.html | 10 +++ docs/responses-api/index.html | 10 +++ 7 files changed, 174 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cc1bb3d..f964f84d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ - Record-mode live proxying for the Veo surface (`record.providers.veo`) — submit and poll forwarded 1:1, eager fixture capture of the Files-API uri on `done:true`; captured operations replay later (#278) - Native xAI Grok Imagine async video lifecycle mock — `POST /v1/videos/generations` submit (JSON-only; multipart rejected with 400), `GET /v1/videos/{request_id}` poll through `pending → done | failed | expired` with synthesized `progress`, `grokVideo` progression, `cost_in_usd_ticks` units, and a Sora-safe `/v1/videos/{id}` dispatch that leaves the OpenAI video surface unchanged (#278) - Record-mode live proxying for the Grok surface (`record.providers.grok`) — submit and poll forwarded 1:1, eager fixture capture of url/duration/cost on `done`, `failed` persisted, `expired` passed through; captured jobs replay later (#278) +- Optional `blocks` array on the combined `content` + `toolCalls` fixture shape lets a fixture express ordered text/tool-call blocks (`{type:"text",text}` | `{type:"toolCall",name,arguments,id?}`); when present it takes precedence over `{content, toolCalls}` for stream order, enabling tool-first and interleaved ordering. Legacy `{content, toolCalls}` fixtures are unchanged (#274) +- All five providers stream combined responses in fixture block order: Anthropic, OpenAI Responses, and Gemini are fully observable; Ollama is best-effort (clients may reassemble positionally); OpenAI chat-completions emits in order but is degenerate (`delta.content`/`delta.tool_calls` are separate channels the client merges) (#274) +- Recorder captures block order and persists `blocks` only when the recorded upstream stream was genuinely tool-first or interleaved; text-first streams keep the legacy `{content, toolCalls}` shape so golden recordings round-trip byte-identically (#274) ## [1.34.0] - 2026-06-24 diff --git a/docs/chat-completions/index.html b/docs/chat-completions/index.html index 11572fbc..ac7f3aaa 100644 --- a/docs/chat-completions/index.html +++ b/docs/chat-completions/index.html @@ -227,6 +227,17 @@

Streaming (stream: true)

ChatCompletionChunk type with delta instead of message.

+ +

Ordered blocks (tool-first)

+

+ A combined content + toolCalls fixture accepts an optional + blocks array to control stream order — see + Ordered blocks. On chat-completions this is + degenerate: delta.content and delta.tool_calls + are separate channels the client merges, so the mock emits chunks in block order (the wire + order is assertable) but tool-first is not positionally observable to clients. Use + Anthropic, the Responses API, or Gemini for fully observable tool-first ordering. +

diff --git a/docs/claude-messages/index.html b/docs/claude-messages/index.html index 30d961f6..d999821d 100644 --- a/docs/claude-messages/index.html +++ b/docs/claude-messages/index.html @@ -146,6 +146,16 @@

Request Translation

arrays (including content block arrays) to OpenAI-style messages so the same fixtures work across all providers.

+ +

Ordered blocks (tool-first)

+

+ A combined content + toolCalls fixture accepts an optional + blocks array to control stream order — see + Ordered blocks. Claude Messages has + full support: typed text / tool_use content + blocks stream at incrementing indices in array order, so tool-first and interleaved + ordering are natively observable to clients. +

diff --git a/docs/fixtures/index.html b/docs/fixtures/index.html index 83e9b96c..75bbd646 100644 --- a/docs/fixtures/index.html +++ b/docs/fixtures/index.html @@ -312,8 +312,12 @@

Response Types

Content + Tool Calls - content, toolCalls[], reasoning?, finishReason? - Text and tool calls in a single response + content, toolCalls[], blocks?, reasoning?, finishReason? + + Text and tool calls in a single response. Add an optional + blocks array to control stream order (e.g. tool-first) — see + Ordered blocks below. + Error @@ -362,6 +366,120 @@

Response Types

+

Ordered blocks (tool-first & interleaved streaming)

+

+ By default a Content + Tool Calls response streams its text first, then + its tool calls. To control that order — for example to emit a tool call + before any text (“tool-first”), or to interleave text and tool calls + — add an optional blocks array. Each entry is one of: +

+
    +
  • { "type": "text", "text": "..." } — a text segment
  • +
  • + { "type": "toolCall", "name": "...", "arguments": ..., "id": "..." } + — a tool call (id optional; arguments accepts an object + or string, same auto-stringify rules as elsewhere) +
  • +
+

+ When blocks is present it takes precedence over the + content and toolCalls fields for stream ordering: the blocks are + streamed in array order. (Keep content and toolCalls populated + as well — they remain the canonical aggregate for replay and for consumers that do + not read blocks.) When blocks is absent, legacy + { content, toolCalls } fixtures stream exactly as before — text-first, + byte-identical to prior releases. The field is purely additive. +

+
+
tool-first.json json
+
{
+  "content": "Here is the weather.",
+  "toolCalls": [
+    { "name": "get_weather", "arguments": { "city": "SF" } }
+  ],
+  "blocks": [
+    { "type": "toolCall", "name": "get_weather", "arguments": { "city": "SF" }, "id": "call_1" },
+    { "type": "text", "text": "Here is the weather." }
+  ]
+}
+
+

+ The example above streams the get_weather tool call before the text. + For an interleaved stream, list blocks in the desired order, e.g. + [toolCall, text, toolCall]. +

+ +

Per-provider observability

+

+ How faithfully “tool-first” / interleaved order is observable depends on each + provider's wire protocol. The mock always emits chunks in block order; what a client can + reconstruct from those chunks varies: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ProviderBlock-order supportNotes
Anthropic (Claude Messages)Full + Typed text / tool_use content blocks at incrementing + indices — tool-first and interleaved are natively observable. +
OpenAI Responses APIFull + Ordered output items (message vs function_call) carry + output_index — SDKs honor the order, so a tool call can precede + the message. +
GeminiFull + Ordered parts/candidate chunks carry functionCall and text in any + order. +
OllamaPartial + A tool_calls chunk can be emitted before content on the wire, but some + clients reassemble positionally. Best-effort. +
OpenAI chat-completionsDegenerate + delta.content and delta.tool_calls are separate channels + the client merges. The mock emits chunks in block order (and the wire order is + assertable), but the merge is not positionally interleaved, so tool-first + is not semantically observable to clients on this channel. +
+
+

+ Recording: In record mode the recorder only persists a + blocks array when the recorded upstream stream was + genuinely tool-first or interleaved (a tool-call delta arrives before the first + content delta, or content arrives after a tool-call delta). Ordinary text-then-tools + streams are saved in the legacy { content, toolCalls } shape with no + blocks key, so existing golden recordings round-trip byte-identically. +

+
+

JSON auto-stringify: In fixture files and programmatic API, diff --git a/docs/gemini/index.html b/docs/gemini/index.html index e0ca009c..032f6c79 100644 --- a/docs/gemini/index.html +++ b/docs/gemini/index.html @@ -183,6 +183,16 @@

Vertex AI

The same fixtures work for both Gemini AI Studio and Vertex AI endpoints. See the Vertex AI page for configuration details.

+ +

Ordered blocks (tool-first)

+

+ A combined content + toolCalls fixture accepts an optional + blocks array to control stream order — see + Ordered blocks. Gemini has + full support: ordered parts/candidate chunks carry + functionCall and text in array order, so tool-first and interleaved ordering + are observable to clients. +

diff --git a/docs/ollama/index.html b/docs/ollama/index.html index ab675a6c..50ecc137 100644 --- a/docs/ollama/index.html +++ b/docs/ollama/index.html @@ -255,6 +255,16 @@

Request Translation

options.num_predict to max_tokens, so the same fixtures work across all providers.

+ +

Ordered blocks (tool-first)

+

+ A combined content + toolCalls fixture accepts an optional + blocks array to control stream order — see + Ordered blocks. Ollama support is + partial: a tool_calls chunk can be emitted before content on + the NDJSON wire, but some clients reassemble positionally, so tool-first is best-effort on + this provider. +

diff --git a/docs/responses-api/index.html b/docs/responses-api/index.html index d2504ead..73df6704 100644 --- a/docs/responses-api/index.html +++ b/docs/responses-api/index.html @@ -160,6 +160,16 @@

SSE Event Sequence

WebSocket APIs page for WebSocket-specific details.

+ +

Ordered blocks (tool-first)

+

+ A combined content + toolCalls fixture accepts an optional + blocks array to control stream order — see + Ordered blocks. The Responses API has + full support: output items (message vs + function_call) are assigned output_index in array order, so a + tool call can precede the message and SDKs honor the ordering. +