From 9ae4d087d98e434935f323caa1edba5219a9ea7d Mon Sep 17 00:00:00 2001
From: Jordan Ritter
Date: Fri, 26 Jun 2026 23:11:23 -0700
Subject: [PATCH 1/5] feat(fixtures): add blocks type, validator, and
loader/factory normalization (#274)
Introduce the fixture blocks array type with validation, and normalize
loader/factory paths so block-ordered fixtures flow through consistently.
---
src/fixture-loader.ts | 109 +++++++++++++++++++++
src/helpers.ts | 220 ++++++++++++++++++++++++++++++++++++++++++
src/index.ts | 2 +
src/types.ts | 46 +++++++++
4 files changed, 377 insertions(+)
diff --git a/src/fixture-loader.ts b/src/fixture-loader.ts
index 5caa8d10..498749e6 100644
--- a/src/fixture-loader.ts
+++ b/src/fixture-loader.ts
@@ -46,6 +46,25 @@ export function normalizeResponse(raw: FixtureFileResponse): FixtureResponse {
});
}
+ // Carry the optional ordered `blocks` array through, mirroring the
+ // toolCalls[].arguments idiom above: auto-stringify object `arguments` on
+ // each `toolCall` block. Gated on Array.isArray so a malformed (non-array)
+ // `blocks` value passes through untouched rather than crashing — downstream
+ // validation/builders own shape rejection. Absent `blocks` → key absent.
+ if (Array.isArray(response.blocks)) {
+ response.blocks = (response.blocks as Array>).map((block) => {
+ if (
+ block != null &&
+ block.type === "toolCall" &&
+ typeof block.arguments === "object" &&
+ block.arguments !== null
+ ) {
+ return { ...block, arguments: JSON.stringify(block.arguments) };
+ }
+ return block;
+ });
+ }
+
return response as unknown as FixtureResponse;
}
@@ -257,6 +276,91 @@ function validateWebSearches(
}
}
+function validateBlocks(
+ response: { blocks?: unknown },
+ fixtureIndex: number,
+ results: ValidationResult[],
+): void {
+ if (response.blocks === undefined) return;
+
+ // Mirrors the toolCalls checks: reject malformed `blocks` at LOAD time so a
+ // bad blocks array never reaches the dispatch/builder (where
+ // resolveFixtureBlocks throws AFTER the journal has already recorded
+ // status:200, yielding a journal-200/client-500 mismatch). #274 F3+F8.
+ if (!Array.isArray(response.blocks)) {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks must be an array, got ${typeof response.blocks}`,
+ });
+ return;
+ }
+
+ for (let j = 0; j < response.blocks.length; j++) {
+ const block = response.blocks[j] as Record | null | undefined;
+ if (typeof block !== "object" || block === null) {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}] must be an object`,
+ });
+ continue;
+ }
+ if (block.type !== "text" && block.type !== "toolCall") {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].type must be "text" or "toolCall", got ${JSON.stringify(block.type)}`,
+ });
+ continue;
+ }
+ if (block.type === "text") {
+ if (typeof block.text !== "string") {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].text must be a string, got ${typeof block.text}`,
+ });
+ }
+ } else {
+ // toolCall block — mirror toolCalls[] name + arguments checks.
+ if (typeof block.name !== "string" || block.name === "") {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].name must be a non-empty string`,
+ });
+ }
+ // `arguments` is JSON-string in runtime form (normalizeResponse already
+ // stringified object/array args); accept a valid-JSON string or an object.
+ if (typeof block.arguments === "string") {
+ try {
+ JSON.parse(block.arguments);
+ } catch {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].arguments is not valid JSON: ${block.arguments}`,
+ });
+ }
+ } else if (typeof block.arguments !== "object" || block.arguments === null) {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].arguments must be a JSON string or object, got ${typeof block.arguments}`,
+ });
+ }
+ if (block.id !== undefined && typeof block.id !== "string") {
+ results.push({
+ severity: "error",
+ fixtureIndex,
+ message: `blocks[${j}].id must be a string, got ${typeof block.id}`,
+ });
+ }
+ }
+ }
+}
+
export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
const results: ValidationResult[] = [];
@@ -348,6 +452,11 @@ export function validateFixtures(fixtures: Fixture[]): ValidationResult[] {
validateWebSearches(response, i, results);
}
+ // Optional ordered `blocks` checks — validated whenever present on the
+ // response, regardless of which content/toolCalls guard matched, so a
+ // malformed blocks array is rejected at LOAD rather than at dispatch.
+ validateBlocks(response as { blocks?: unknown }, i, results);
+
// Tool call response checks
if (isToolCallResponse(response)) {
if (response.toolCalls.length === 0) {
diff --git a/src/helpers.ts b/src/helpers.ts
index 2bb47b32..40af7402 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -21,6 +21,7 @@ import type {
RawJSONResponse,
SSEChunk,
ToolCall,
+ FixtureBlock,
ChatCompletion,
ResponseOverrides,
} from "./types.js";
@@ -236,6 +237,24 @@ function normalizeFactoryResponse(raw: FixtureResponse): FixtureResponse {
return { ...tc };
});
}
+ // Mirror the toolCalls[].arguments idiom for the optional ordered `blocks`
+ // array: auto-stringify object `arguments` on each `toolCall` block so a
+ // programmatic ResponseFactory may return objects (resolveFixtureBlocks
+ // requires string `arguments`). Text blocks and string arguments pass
+ // through unchanged. Matches the loader's block handling.
+ if (Array.isArray(r.blocks)) {
+ r.blocks = (r.blocks as Array>).map((block) => {
+ if (
+ block != null &&
+ block.type === "toolCall" &&
+ typeof block.arguments === "object" &&
+ block.arguments !== null
+ ) {
+ return { ...block, arguments: JSON.stringify(block.arguments) };
+ }
+ return { ...block };
+ });
+ }
return r as unknown as FixtureResponse;
}
@@ -278,6 +297,61 @@ export function isContentWithToolCallsResponse(
);
}
+/**
+ * Validate and pass through the ordered `blocks` field of a combined
+ * content+toolCalls fixture. Used ONLY on the new block-iteration path (when a
+ * fixture explicitly sets `blocks`); it is NOT a legacy-order reconstructor —
+ * fixtures without `blocks` never reach this function and keep their unchanged
+ * text-first path.
+ *
+ * An EMPTY `blocks` array is treated as "no blocks" by every builder's
+ * streaming gate (`blocks && blocks.length > 0`), so it falls back to the
+ * legacy `{content, toolCalls}` path and never reaches this function — the gate
+ * is the single source of truth for "has blocks". This validator therefore only
+ * ever runs on a non-empty array.
+ *
+ * Returns the blocks in array order. Each entry must be a valid
+ * {@link FixtureBlock}: a `text` block with a string `text`, or a `toolCall`
+ * block with string `name` + `arguments` (and an optional string `id`).
+ * Throws on a malformed array or entry — same fail-fast idiom as the other
+ * fixture validators in this module (see e.g. the factory guard at
+ * {@link resolveResponse}).
+ */
+export function resolveFixtureBlocks(blocks: FixtureBlock[]): FixtureBlock[] {
+ if (!Array.isArray(blocks)) {
+ throw new Error(`Invalid fixture blocks: expected an array, got ${typeof blocks}`);
+ }
+ blocks.forEach((block, i) => {
+ if (block === null || typeof block !== "object") {
+ throw new Error(`Invalid fixture block at index ${i}: expected an object`);
+ }
+ const b = block as Record;
+ if (b.type === "text") {
+ if (typeof b.text !== "string") {
+ throw new Error(
+ `Invalid fixture block at index ${i}: "text" block requires a string "text" field`,
+ );
+ }
+ } else if (b.type === "toolCall") {
+ if (typeof b.name !== "string" || typeof b.arguments !== "string") {
+ throw new Error(
+ `Invalid fixture block at index ${i}: "toolCall" block requires string "name" and "arguments" fields`,
+ );
+ }
+ if (b.id !== undefined && typeof b.id !== "string") {
+ throw new Error(
+ `Invalid fixture block at index ${i}: "toolCall" block "id" must be a string when present`,
+ );
+ }
+ } else {
+ throw new Error(
+ `Invalid fixture block at index ${i}: unknown type ${JSON.stringify(b.type)} (expected "text" or "toolCall")`,
+ );
+ }
+ });
+ return blocks;
+}
+
export function isErrorResponse(r: FixtureResponse): r is ErrorResponse {
return (
"error" in r &&
@@ -752,6 +826,7 @@ export function buildContentWithToolCallsChunks(
chunkSize: number,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): SSEChunk[] {
const id = overrides?.id ?? generateId();
const created = overrides?.created ?? Math.floor(Date.now() / 1000);
@@ -759,6 +834,143 @@ export function buildContentWithToolCallsChunks(
const chunks: SSEChunk[] = [];
const fingerprint = overrides?.systemFingerprint;
+ if (blocks && blocks.length > 0) {
+ // NEW: emit chunks in fixture block array order.
+ //
+ // DEGENERATE PROVIDER NOTE: in OpenAI chat-completions, `delta.content` and
+ // `delta.tool_calls` are SEPARATE channels that the client merges with no
+ // positional interleaving. So "tool-call-before-text" is NOT semantically
+ // observable to a real client — it reassembles content and tool calls into
+ // their own buckets regardless of chunk order. We still emit honest
+ // array-order chunks (the SSE chunk SEQUENCE is the contract this path
+ // asserts), but we do NOT fake interleaving the channel cannot express.
+ const ordered = resolveFixtureBlocks(blocks);
+
+ // Reasoning chunks (emitted first, OpenRouter format) — unchanged from legacy.
+ if (reasoning) {
+ for (let i = 0; i < reasoning.length; i += chunkSize) {
+ const slice = reasoning.slice(i, i + chunkSize);
+ chunks.push({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: effectiveModel,
+ choices: [
+ { index: 0, delta: { reasoning_content: slice }, logprobs: null, finish_reason: null },
+ ],
+ ...(fingerprint !== undefined && { system_fingerprint: fingerprint }),
+ });
+ }
+ }
+
+ // Role chunk — preserved exactly as the legacy path.
+ chunks.push({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: effectiveModel,
+ choices: [
+ {
+ index: 0,
+ delta: { role: overrides?.role ?? "assistant", content: "" },
+ logprobs: null,
+ finish_reason: null,
+ },
+ ],
+ ...(fingerprint !== undefined && { system_fingerprint: fingerprint }),
+ });
+
+ // Tool-call `index` is assigned in encounter order across the block array.
+ let tcIdx = 0;
+ for (const block of ordered) {
+ if (block.type === "text") {
+ for (let i = 0; i < block.text.length; i += chunkSize) {
+ const slice = block.text.slice(i, i + chunkSize);
+ chunks.push({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: effectiveModel,
+ choices: [{ index: 0, delta: { content: slice }, logprobs: null, finish_reason: null }],
+ ...(fingerprint !== undefined && { system_fingerprint: fingerprint }),
+ });
+ }
+ } else {
+ const tcId = block.id || generateToolCallId();
+
+ // Initial tool call chunk (id + function name)
+ chunks.push({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: effectiveModel,
+ choices: [
+ {
+ index: 0,
+ delta: {
+ tool_calls: [
+ {
+ index: tcIdx,
+ id: tcId,
+ type: "function",
+ function: { name: block.name, arguments: "" },
+ },
+ ],
+ },
+ logprobs: null,
+ finish_reason: null,
+ },
+ ],
+ ...(fingerprint !== undefined && { system_fingerprint: fingerprint }),
+ });
+
+ // Argument streaming chunks
+ const args = block.arguments;
+ for (let i = 0; i < args.length; i += chunkSize) {
+ const slice = args.slice(i, i + chunkSize);
+ chunks.push({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: effectiveModel,
+ choices: [
+ {
+ index: 0,
+ delta: {
+ tool_calls: [{ index: tcIdx, function: { arguments: slice } }],
+ },
+ logprobs: null,
+ finish_reason: null,
+ },
+ ],
+ ...(fingerprint !== undefined && { system_fingerprint: fingerprint }),
+ });
+ }
+ tcIdx++;
+ }
+ }
+
+ // Finish chunk — preserved exactly as the legacy path.
+ chunks.push({
+ id,
+ object: "chat.completion.chunk",
+ created,
+ model: effectiveModel,
+ choices: [
+ {
+ index: 0,
+ delta: {},
+ logprobs: null,
+ finish_reason: overrides?.finishReason ?? "tool_calls",
+ },
+ ],
+ ...(fingerprint !== undefined && { system_fingerprint: fingerprint }),
+ });
+
+ return chunks;
+ }
+
+ // EXISTING legacy code, byte-for-byte UNCHANGED.
// Reasoning chunks (emitted before content, OpenRouter format)
if (reasoning) {
for (let i = 0; i < reasoning.length; i += chunkSize) {
@@ -881,6 +1093,14 @@ export function buildContentWithToolCallsChunks(
return chunks;
}
+// NOTE (#274): this NON-streaming OpenAI chat-completions builder is
+// intentionally degenerate w.r.t. `blocks` ordering. A chat.completion puts
+// `message.content` and `message.tool_calls` in SEPARATE fields on a single
+// message object — they are NOT a positionally-observable array, so a
+// tool-first `blocks` fixture cannot be expressed in the wire shape. Honoring
+// block order here would be a no-op, so the legacy content+tool_calls fields
+// are unchanged. (Order-observable surfaces — Claude `content[]`, Gemini
+// `parts[]`, Responses `output[]` — DO honor block order; see those builders.)
export function buildContentWithToolCallsCompletion(
content: string,
toolCalls: ToolCall[],
diff --git a/src/index.ts b/src/index.ts
index 838f3cd3..b93c3030 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -133,6 +133,7 @@ export {
isTextResponse,
isToolCallResponse,
isContentWithToolCallsResponse,
+ resolveFixtureBlocks,
isErrorResponse,
isEmbeddingResponse,
isImageResponse,
@@ -334,6 +335,7 @@ export type {
FixtureMatch,
TextResponse,
ToolCall,
+ FixtureBlock,
ToolCallResponse,
ErrorResponse,
EmbeddingResponse,
diff --git a/src/types.ts b/src/types.ts
index dceaca5b..51fae37e 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -186,6 +186,20 @@ export interface ToolCall {
id?: string;
}
+/**
+ * A single ordered streaming block for a {@link ContentWithToolCallsResponse}.
+ *
+ * When a combined content+toolCalls fixture sets the optional `blocks` field,
+ * builders stream the blocks in array order — enabling tool-call-before-text
+ * and interleaved orderings that the legacy `{ content, toolCalls }` shape
+ * (always text-first) cannot express. A `text` block carries a text segment; a
+ * `toolCall` block mirrors {@link ToolCall} (`name` + JSON-string `arguments`,
+ * optional `id`).
+ */
+export type FixtureBlock =
+ | { type: "text"; text: string }
+ | { type: "toolCall"; name: string; arguments: string; id?: string };
+
export interface ToolCallResponse extends ResponseOverrides {
toolCalls: ToolCall[];
reasoning?: string;
@@ -199,6 +213,13 @@ export interface ToolCallResponse extends ResponseOverrides {
export interface ContentWithToolCallsResponse extends ResponseOverrides {
content: string;
toolCalls: ToolCall[];
+ /**
+ * Optional ordered streaming blocks. When present, builders stream these in
+ * array order (tool-first / interleaved); when absent, the legacy
+ * `{ content, toolCalls }` text-first path runs unchanged. Purely additive —
+ * `isContentWithToolCallsResponse` still requires `content` + `toolCalls`.
+ */
+ blocks?: FixtureBlock[];
reasoning?: string;
/** Real Anthropic thinking-block signature; see {@link TextResponse.reasoningSignature}. */
reasoningSignature?: string;
@@ -416,6 +437,22 @@ export interface FixtureFileToolCall {
id?: string;
}
+/**
+ * On-disk counterpart of {@link FixtureBlock}. A `toolCall` block's
+ * `arguments` is relaxed exactly like {@link FixtureFileToolCall} so authors
+ * may write a JSON object/array; the loader JSON.stringifies it into the
+ * runtime string form. Normalizes to a {@link FixtureBlock}.
+ */
+export type FixtureFileBlock =
+ | { type: "text"; text: string }
+ | {
+ type: "toolCall";
+ name: string;
+ /** Accepts a JSON object or array for convenience — the loader will JSON.stringify it. */
+ arguments: string | Record | unknown[];
+ id?: string;
+ };
+
export interface FixtureFileToolCallResponse extends ResponseOverrides {
toolCalls: FixtureFileToolCall[];
reasoning?: string;
@@ -441,6 +478,15 @@ export interface FixtureFileContentWithToolCallsResponse extends ResponseOverrid
/** Accepts a JSON object or array (structured output) — the loader will JSON.stringify it. */
content: string | Record | unknown[];
toolCalls: FixtureFileToolCall[];
+ /**
+ * Optional ordered streaming blocks (mirrors the in-memory
+ * {@link ContentWithToolCallsResponse.blocks}). When present, builders stream
+ * these in array order (tool-first / interleaved); a `toolCall` block's
+ * object `arguments` is auto-stringified just like `toolCalls[].arguments`.
+ * Absent → legacy text-first path runs unchanged. Purely additive. Uses the
+ * on-disk {@link FixtureFileBlock} shape with relaxed `arguments`.
+ */
+ blocks?: FixtureFileBlock[];
reasoning?: string;
/** Real Anthropic thinking-block signature; see {@link TextResponse.reasoningSignature}. */
reasoningSignature?: string;
From fd7cb17276a5dfca5324b33734ae6916746daee1 Mon Sep 17 00:00:00 2001
From: Jordan Ritter
Date: Fri, 26 Jun 2026 23:11:34 -0700
Subject: [PATCH 2/5] feat(fixtures): stream and non-stream fixture blocks in
array order across providers (#274)
Emit fixture blocks in their declared array order for both streaming and
non-streaming paths across the Anthropic, OpenAI, Gemini, Ollama, Responses,
and WebSocket providers.
---
src/gemini.ts | 92 +++++++++++++++-
src/messages.ts | 139 +++++++++++++++++++++++--
src/ollama.ts | 91 ++++++++++++++++
src/responses.ts | 248 +++++++++++++++++++++++++++++++++-----------
src/server.ts | 1 +
src/ws-responses.ts | 1 +
6 files changed, 502 insertions(+), 70 deletions(-)
diff --git a/src/gemini.ts b/src/gemini.ts
index f2e8a9f7..b7b349b0 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -12,6 +12,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
RecordedTimings,
RecordProviderKey,
@@ -31,6 +32,7 @@ import {
flattenHeaders,
getContext,
getTestId,
+ resolveFixtureBlocks,
resolveResponse,
resolveStrictMode,
resolveReasoningForModel,
@@ -442,6 +444,7 @@ function buildGeminiContentWithToolCallsStreamChunks(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): GeminiResponseChunk[] {
const chunks: GeminiResponseChunk[] = [];
@@ -460,6 +463,69 @@ function buildGeminiContentWithToolCallsStreamChunks(
}
}
+ if (blocks && blocks.length > 0) {
+ // NEW path (#274): stream chunks whose parts follow the blocks' ARRAY ORDER,
+ // so a tool-first / interleaved fixture emits its functionCall part before
+ // its text part. Gemini's ordered `parts` make this fully expressible. The
+ // terminal block carries the finishReason regardless of its type. Legacy
+ // fixtures (no `blocks`) never enter here — see the else branch below.
+ const resolved = resolveFixtureBlocks(blocks);
+ resolved.forEach((block, i) => {
+ const isLast = i === resolved.length - 1;
+ const finishReason = isLast
+ ? geminiFinishReason(overrides?.finishReason, "FUNCTION_CALL")
+ : undefined;
+ if (block.type === "toolCall") {
+ const part = parseToolCallPart(
+ { name: block.name, arguments: block.arguments, id: block.id },
+ logger,
+ );
+ chunks.push({
+ candidates: [
+ {
+ content: { role: "model", parts: [part] },
+ ...(finishReason ? { finishReason } : {}),
+ index: 0,
+ },
+ ],
+ ...(isLast ? { usageMetadata: geminiUsageMetadata(overrides) } : {}),
+ });
+ } else {
+ const text = block.text;
+ if (text.length === 0) {
+ chunks.push({
+ candidates: [
+ {
+ content: { role: "model", parts: [{ text: "" }] },
+ ...(finishReason ? { finishReason } : {}),
+ index: 0,
+ },
+ ],
+ ...(isLast ? { usageMetadata: geminiUsageMetadata(overrides) } : {}),
+ });
+ } else {
+ for (let j = 0; j < text.length; j += chunkSize) {
+ const slice = text.slice(j, j + chunkSize);
+ const lastSlice = j + chunkSize >= text.length;
+ const sliceFinish = isLast && lastSlice ? finishReason : undefined;
+ chunks.push({
+ candidates: [
+ {
+ content: { role: "model", parts: [{ text: slice }] },
+ ...(sliceFinish ? { finishReason: sliceFinish } : {}),
+ index: 0,
+ },
+ ],
+ ...(isLast && lastSlice ? { usageMetadata: geminiUsageMetadata(overrides) } : {}),
+ });
+ }
+ }
+ }
+ });
+
+ return chunks;
+ }
+
if (content.length === 0) {
chunks.push({
candidates: [
@@ -505,13 +571,33 @@ function buildGeminiContentWithToolCallsResponse(
logger: Logger,
reasoning?: string,
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): GeminiResponseChunk {
const parts: GeminiPart[] = [];
if (reasoning) {
parts.push({ text: reasoning, thought: true });
}
- parts.push({ text: content });
- parts.push(...toolCalls.map((tc) => parseToolCallPart(tc, logger)));
+
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: the non-streaming `parts[]` array is positionally observable, so
+ // emit parts in the fixture's ARRAY ORDER (after any leading thought part).
+ // A toolCall block before a text block therefore yields a functionCall part
+ // ahead of the text — matching the streaming path for the same `blocks`.
+ const resolved = resolveFixtureBlocks(blocks);
+ for (const block of resolved) {
+ if (block.type === "toolCall") {
+ parts.push(
+ parseToolCallPart({ name: block.name, arguments: block.arguments, id: block.id }, logger),
+ );
+ } else {
+ parts.push({ text: block.text });
+ }
+ }
+ } else {
+ // LEGACY PATH (unchanged): text part first, then functionCall parts.
+ parts.push({ text: content });
+ parts.push(...toolCalls.map((tc) => parseToolCallPart(tc, logger)));
+ }
return {
candidates: [
@@ -920,6 +1006,7 @@ export async function handleGemini(
logger,
effReasoning,
overrides,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
@@ -931,6 +1018,7 @@ export async function handleGemini(
logger,
effReasoning,
overrides,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeGeminiSSEStream(res, chunks, {
diff --git a/src/messages.ts b/src/messages.ts
index a1a8c4b0..c0fbc7bb 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -13,6 +13,7 @@ import type {
Fixture,
HandlerDefaults,
RecordedTimings,
+ FixtureBlock,
ResponseOverrides,
StreamingProfile,
ToolCall,
@@ -26,6 +27,7 @@ import {
isToolCallResponse,
isContentWithToolCallsResponse,
isErrorResponse,
+ resolveFixtureBlocks,
flattenHeaders,
getTestId,
resolveResponse,
@@ -816,6 +818,7 @@ function buildClaudeContentWithToolCallsStreamEvents(
overrides?: ResponseOverrides,
reasoningSignature?: string,
redactedThinking?: string[],
+ blocks?: FixtureBlock[],
): ClaudeSSEEvent[] {
const msgId = overrides?.id ?? generateMessageId();
const effectiveModel = overrides?.model ?? model;
@@ -842,10 +845,11 @@ function buildClaudeContentWithToolCallsStreamEvents(
let blockIndex = 0;
// Redacted-thinking blocks lead the turn (before thinking / text / tool_use);
- // see the helper for the ordering caveat.
+ // see the helper for the ordering caveat. Applies to both the legacy and the
+ // ordered-`blocks` paths.
blockIndex = pushRedactedThinkingStreamEvents(events, blockIndex, redactedThinking);
- // Optional thinking block
+ // Optional thinking block — also shared by both paths.
if (reasoning) {
// Real Anthropic emits an empty `signature` on the thinking
// `content_block_start`; the cryptographic signature arrives only via the
@@ -879,6 +883,99 @@ function buildClaudeContentWithToolCallsStreamEvents(
blockIndex++;
}
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: stream `text`/`tool_use` content blocks in the fixture's array
+ // order. Anthropic is fully tool-first capable — a `toolCall` block can take
+ // a lower `index` than a `text` block. Content-block indices are assigned in
+ // encounter order, continuing from any leading thinking/redacted blocks.
+ const ordered = resolveFixtureBlocks(blocks);
+
+ for (const block of ordered) {
+ if (block.type === "text") {
+ events.push({
+ type: "content_block_start",
+ index: blockIndex,
+ content_block: { type: "text", text: "" },
+ });
+
+ for (let i = 0; i < block.text.length; i += chunkSize) {
+ const slice = block.text.slice(i, i + chunkSize);
+ events.push({
+ type: "content_block_delta",
+ index: blockIndex,
+ delta: { type: "text_delta", text: slice },
+ });
+ }
+
+ events.push({
+ type: "content_block_stop",
+ index: blockIndex,
+ });
+
+ blockIndex++;
+ } else {
+ const toolUseId = block.id || generateToolUseId();
+
+ let argsObj: unknown;
+ try {
+ argsObj = JSON.parse(block.arguments || "{}");
+ } catch {
+ logger.warn(
+ `Malformed JSON in fixture tool call arguments for "${block.name}": ${block.arguments}`,
+ );
+ argsObj = {};
+ }
+ const argsJson = JSON.stringify(argsObj);
+
+ events.push({
+ type: "content_block_start",
+ index: blockIndex,
+ content_block: {
+ type: "tool_use",
+ id: toolUseId,
+ name: block.name,
+ input: {},
+ },
+ });
+
+ for (let i = 0; i < argsJson.length; i += chunkSize) {
+ const slice = argsJson.slice(i, i + chunkSize);
+ events.push({
+ type: "content_block_delta",
+ index: blockIndex,
+ delta: { type: "input_json_delta", partial_json: slice },
+ });
+ }
+
+ events.push({
+ type: "content_block_stop",
+ index: blockIndex,
+ });
+
+ blockIndex++;
+ }
+ }
+
+ // message_delta
+ events.push({
+ type: "message_delta",
+ delta: {
+ stop_reason: claudeStopReason(overrides?.finishReason, "tool_use"),
+ stop_sequence: null,
+ },
+ usage: { output_tokens: claudeUsage(overrides).output_tokens },
+ });
+
+ // message_stop
+ events.push({ type: "message_stop" });
+
+ return events;
+ }
+
+ // LEGACY PATH (byte-for-byte unchanged): text content block, then tool_use
+ // blocks in `toolCalls` order. Reached only when `blocks` is absent; the
+ // leading redacted-thinking/thinking blocks above are shared with the new
+ // path and produce identical wire output here.
// Text content block
events.push({
type: "content_block_start",
@@ -970,6 +1067,7 @@ function buildClaudeContentWithToolCallsResponse(
overrides?: ResponseOverrides,
reasoningSignature?: string,
redactedThinking?: string[],
+ blocks?: FixtureBlock[],
): object {
const contentBlocks: object[] = [];
@@ -986,9 +1084,10 @@ function buildClaudeContentWithToolCallsResponse(
});
}
- contentBlocks.push({ type: "text", text: content });
-
- for (const tc of toolCalls) {
+ // Build a tool_use content block from a fixture tool call, parsing its
+ // string `arguments` into the object `input` Anthropic emits (warning on
+ // malformed JSON, same idiom as the streaming/legacy paths).
+ const toolUseBlock = (tc: { name: string; arguments: string; id?: string }): object => {
let argsObj: unknown;
try {
argsObj = JSON.parse(tc.arguments || "{}");
@@ -998,12 +1097,36 @@ function buildClaudeContentWithToolCallsResponse(
);
argsObj = {};
}
- contentBlocks.push({
+ return {
type: "tool_use",
id: tc.id || generateToolUseId(),
name: tc.name,
input: argsObj,
- });
+ };
+ };
+
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: the non-streaming `content[]` array is positionally observable,
+ // so emit `text`/`tool_use` content blocks in the fixture's ARRAY ORDER
+ // (after any leading redacted/thinking blocks). A toolCall block before a
+ // text block therefore yields a tool_use ahead of the text — matching the
+ // streaming path for the same `blocks` fixture.
+ const ordered = resolveFixtureBlocks(blocks);
+ for (const block of ordered) {
+ if (block.type === "text") {
+ contentBlocks.push({ type: "text", text: block.text });
+ } else {
+ contentBlocks.push(
+ toolUseBlock({ name: block.name, arguments: block.arguments, id: block.id }),
+ );
+ }
+ }
+ } else {
+ // LEGACY PATH (unchanged): text content block, then tool_use blocks.
+ contentBlocks.push({ type: "text", text: content });
+ for (const tc of toolCalls) {
+ contentBlocks.push(toolUseBlock(tc));
+ }
}
return {
@@ -1340,6 +1463,7 @@ export async function handleMessages(
overrides,
effReasoningSignature,
effRedactedThinking,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
@@ -1354,6 +1478,7 @@ export async function handleMessages(
overrides,
effReasoningSignature,
effRedactedThinking,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeClaudeSSEStream(res, events, {
diff --git a/src/ollama.ts b/src/ollama.ts
index acba505d..88a6b5c3 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -18,6 +18,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
ToolCall,
ToolDefinition,
@@ -28,6 +29,7 @@ import {
isContentWithToolCallsResponse,
isErrorResponse,
isEmbeddingResponse,
+ resolveFixtureBlocks,
serializeErrorResponse,
generateDeterministicEmbedding,
flattenHeaders,
@@ -332,6 +334,21 @@ function buildOllamaChatToolCallResponse(
// ─── Response builders: /api/chat — content + tool calls ────────────────────
+// Map a fixture tool call into Ollama's wire shape (object arguments, no id).
+function toOllamaToolCall(
+ tc: ToolCall,
+ logger: Logger,
+): { function: { name: string; arguments: unknown } } {
+ let argsObj: unknown;
+ try {
+ argsObj = JSON.parse(tc.arguments || "{}");
+ } catch {
+ logger.warn(`Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`);
+ argsObj = {};
+ }
+ return { function: { name: tc.name, arguments: argsObj } };
+}
+
function buildOllamaChatContentWithToolCallsChunks(
content: string,
toolCalls: ToolCall[],
@@ -339,10 +356,75 @@ function buildOllamaChatContentWithToolCallsChunks(
chunkSize: number,
logger: Logger,
reasoning?: string,
+ blocks?: FixtureBlock[],
): object[] {
const chunks: object[] = [];
const createdAt = new Date().toISOString();
+ // ── Ordered-blocks path ──────────────────────────────────────────────────
+ // When the fixture declares explicit `blocks`, stream NDJSON message chunks
+ // following the blocks' ARRAY ORDER: a text block emits a `message.content`
+ // delta chunk; a toolCall block emits a chunk carrying `message.tool_calls`.
+ // So [toolCall, text] puts the tool_call-bearing chunk before the content
+ // chunk. Ollama tool-first ordering is PARTIALLY observable: the chunk order
+ // on the wire is honored, but some Ollama clients reassemble content and
+ // tool_calls positionally (text first regardless), so downstream order is
+ // best-effort. Reasoning chunks (if any) still lead, matching legacy. The
+ // legacy single-chunk-all-tools path stays untouched on the else branch.
+ if (blocks && blocks.length > 0) {
+ const ordered = resolveFixtureBlocks(blocks);
+
+ // Reasoning chunks (before everything else), identical to legacy.
+ if (reasoning) {
+ for (let i = 0; i < reasoning.length; i += chunkSize) {
+ const slice = reasoning.slice(i, i + chunkSize);
+ chunks.push({
+ model,
+ created_at: createdAt,
+ message: { role: "assistant", content: "", reasoning_content: slice },
+ done: false,
+ });
+ }
+ }
+
+ for (const block of ordered) {
+ if (block.type === "text") {
+ for (let i = 0; i < block.text.length; i += chunkSize) {
+ const slice = block.text.slice(i, i + chunkSize);
+ chunks.push({
+ model,
+ created_at: createdAt,
+ message: { role: "assistant", content: slice },
+ done: false,
+ });
+ }
+ } else {
+ chunks.push({
+ model,
+ created_at: createdAt,
+ message: {
+ role: "assistant",
+ content: "",
+ tool_calls: [toOllamaToolCall(block, logger)],
+ },
+ done: false,
+ });
+ }
+ }
+
+ // Final chunk — preserved exactly as legacy (done + timing fields).
+ chunks.push({
+ model,
+ created_at: createdAt,
+ message: { role: "assistant", content: "" },
+ done: true,
+ ...DURATION_FIELDS,
+ });
+
+ return chunks;
+ }
+
+ // ── Legacy path (UNCHANGED) ──────────────────────────────────────────────
// Reasoning chunks (before content)
if (reasoning) {
for (let i = 0; i < reasoning.length; i += chunkSize) {
@@ -409,6 +491,14 @@ function buildOllamaChatContentWithToolCallsChunks(
return chunks;
}
+// NOTE (#274): this NON-streaming Ollama builder is intentionally degenerate
+// w.r.t. `blocks` ordering. Ollama's non-streaming chat response puts `content`
+// and `tool_calls` in SEPARATE fields on a single `message` object — they are
+// NOT a positionally-observable array, so a tool-first `blocks` fixture cannot
+// be expressed in the wire shape. Honoring block order here would be a no-op,
+// so we keep the legacy text+tool_calls fields unchanged. (Order-observable
+// surfaces — Claude `content[]`, Gemini `parts[]`, Responses `output[]` — DO
+// honor block order; see those builders.)
function buildOllamaChatContentWithToolCallsResponse(
content: string,
toolCalls: ToolCall[],
@@ -755,6 +845,7 @@ export async function handleOllama(
chunkSize,
logger,
effReasoning,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeNDJSONStream(res, chunks, {
diff --git a/src/responses.ts b/src/responses.ts
index c61ab47a..592f9ba5 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -11,6 +11,7 @@ import type {
ChatCompletionRequest,
ChatMessage,
Fixture,
+ FixtureBlock,
HandlerDefaults,
ResponseOverrides,
StreamingProfile,
@@ -20,6 +21,7 @@ import type {
import {
generateId,
generateToolCallId,
+ resolveFixtureBlocks,
extractOverrides,
isTextResponse,
isToolCallResponse,
@@ -656,6 +658,71 @@ function buildMessageOutputEvents(
return { events, msgItem };
}
+interface FunctionCallBlockResult {
+ events: ResponsesSSEEvent[];
+ fcItem: object;
+}
+
+/**
+ * Emit the output_item.added → arguments deltas → arguments.done →
+ * output_item.done events for a single function_call at `outputIndex`,
+ * returning the completed item for the final `output` array. Behavior is
+ * identical to the inline per-tool-call loop in the legacy path; both the
+ * legacy branch and the ordered-blocks branch share this so wire output stays
+ * byte-identical for a given (tool, outputIndex).
+ */
+function buildFunctionCallOutputEvents(
+ toolCall: ToolCall,
+ chunkSize: number,
+ outputIndex: number,
+): FunctionCallBlockResult {
+ const callId = toolCall.id || generateToolCallId();
+ const fcId = generateId("fc");
+ const args = toolCall.arguments;
+ const events: ResponsesSSEEvent[] = [];
+
+ events.push({
+ type: "response.output_item.added",
+ output_index: outputIndex,
+ item: {
+ type: "function_call",
+ id: fcId,
+ call_id: callId,
+ name: toolCall.name,
+ arguments: "",
+ status: "in_progress",
+ },
+ });
+
+ for (let i = 0; i < args.length; i += chunkSize) {
+ events.push({
+ type: "response.function_call_arguments.delta",
+ item_id: fcId,
+ output_index: outputIndex,
+ delta: args.slice(i, i + chunkSize),
+ });
+ }
+
+ events.push({
+ type: "response.function_call_arguments.done",
+ item_id: fcId,
+ output_index: outputIndex,
+ arguments: args,
+ });
+
+ const fcItem = {
+ type: "function_call",
+ id: fcId,
+ call_id: callId,
+ name: toolCall.name,
+ arguments: args,
+ status: "completed",
+ };
+ events.push({ type: "response.output_item.done", output_index: outputIndex, item: fcItem });
+
+ return { events, fcItem };
+}
+
// ─── Non-streaming response builders ────────────────────────────────────────
function buildOutputPrefix(content: string, reasoning?: string, webSearches?: string[]): object[] {
@@ -767,6 +834,7 @@ export function buildContentWithToolCallsStreamEvents(
reasoning?: string,
webSearches?: string[],
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): ResponsesSSEEvent[] {
const { respId, created, events, prefixOutputItems, nextOutputIndex } = buildResponsePreamble(
model,
@@ -776,60 +844,59 @@ export function buildContentWithToolCallsStreamEvents(
overrides,
);
- const { events: msgEvents, msgItem } = buildMessageOutputEvents(
- content,
- chunkSize,
- nextOutputIndex,
- );
- events.push(...msgEvents);
-
- const fcOutputItems: object[] = [];
- for (let idx = 0; idx < toolCalls.length; idx++) {
- const tc = toolCalls[idx];
- const callId = tc.id || generateToolCallId();
- const fcId = generateId("fc");
- const fcOutputIndex = nextOutputIndex + 1 + idx;
- const args = tc.arguments;
-
- events.push({
- type: "response.output_item.added",
- output_index: fcOutputIndex,
- item: {
- type: "function_call",
- id: fcId,
- call_id: callId,
- name: tc.name,
- arguments: "",
- status: "in_progress",
- },
- });
-
- for (let i = 0; i < args.length; i += chunkSize) {
- events.push({
- type: "response.function_call_arguments.delta",
- item_id: fcId,
- output_index: fcOutputIndex,
- delta: args.slice(i, i + chunkSize),
- });
+ // The output items assembled in emission order (after any reasoning /
+ // web-search prefix items). Each output_index is assigned sequentially as we
+ // walk the chosen item order, so the `output_index` on every emitted event
+ // matches that item's slot in the final `response.completed.output` array.
+ const orderedOutputItems: object[] = [];
+
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: stream items in the fixture's block ARRAY ORDER. A toolCall
+ // block placed before a text block therefore yields a function_call item at
+ // a LOWER output_index than the message — it leads the output array.
+ const ordered = resolveFixtureBlocks(blocks);
+ let outputIndex = nextOutputIndex;
+ for (const block of ordered) {
+ if (block.type === "text") {
+ const { events: msgEvents, msgItem } = buildMessageOutputEvents(
+ block.text,
+ chunkSize,
+ outputIndex,
+ );
+ events.push(...msgEvents);
+ orderedOutputItems.push(msgItem);
+ } else {
+ const { events: fcEvents, fcItem } = buildFunctionCallOutputEvents(
+ { name: block.name, arguments: block.arguments, id: block.id },
+ chunkSize,
+ outputIndex,
+ );
+ events.push(...fcEvents);
+ orderedOutputItems.push(fcItem);
+ }
+ outputIndex += 1;
}
+ } else {
+ // LEGACY PATH: message item first, then function_call items — byte-for-byte
+ // unchanged from the pre-blocks behavior (message always leads the output).
+ const { events: msgEvents, msgItem } = buildMessageOutputEvents(
+ content,
+ chunkSize,
+ nextOutputIndex,
+ );
+ events.push(...msgEvents);
+ orderedOutputItems.push(msgItem);
- events.push({
- type: "response.function_call_arguments.done",
- item_id: fcId,
- output_index: fcOutputIndex,
- arguments: args,
- });
-
- const doneItem = {
- type: "function_call",
- id: fcId,
- call_id: callId,
- name: tc.name,
- arguments: args,
- status: "completed",
- };
- events.push({ type: "response.output_item.done", output_index: fcOutputIndex, item: doneItem });
- fcOutputItems.push(doneItem);
+ for (let idx = 0; idx < toolCalls.length; idx++) {
+ const fcOutputIndex = nextOutputIndex + 1 + idx;
+ const { events: fcEvents, fcItem } = buildFunctionCallOutputEvents(
+ toolCalls[idx],
+ chunkSize,
+ fcOutputIndex,
+ );
+ events.push(...fcEvents);
+ orderedOutputItems.push(fcItem);
+ }
}
events.push({
@@ -840,7 +907,7 @@ export function buildContentWithToolCallsStreamEvents(
created_at: created,
model: overrides?.model ?? model,
status: responsesStatus(overrides?.finishReason, "completed"),
- output: [...prefixOutputItems, msgItem, ...fcOutputItems],
+ output: [...prefixOutputItems, ...orderedOutputItems],
usage: responsesUsage(overrides),
},
});
@@ -848,6 +915,27 @@ export function buildContentWithToolCallsStreamEvents(
return events;
}
+function buildFunctionCallOutputItem(tc: { name: string; arguments: string; id?: string }): object {
+ return {
+ type: "function_call",
+ id: generateId("fc"),
+ call_id: tc.id || generateToolCallId(),
+ name: tc.name,
+ arguments: tc.arguments,
+ status: "completed",
+ };
+}
+
+function buildMessageOutputItem(content: string): object {
+ return {
+ type: "message",
+ id: itemId(),
+ status: "completed",
+ role: "assistant",
+ content: [{ type: "output_text", text: content, annotations: [] }],
+ };
+}
+
function buildContentWithToolCallsResponse(
content: string,
toolCalls: ToolCall[],
@@ -855,17 +943,53 @@ function buildContentWithToolCallsResponse(
reasoning?: string,
webSearches?: string[],
overrides?: ResponseOverrides,
+ blocks?: FixtureBlock[],
): object {
+ if (blocks && blocks.length > 0) {
+ // NEW PATH: the non-streaming `output[]` array is positionally observable,
+ // so emit the prefix (reasoning / web_search_call), then the blocks in
+ // fixture ARRAY ORDER. A toolCall block before a text block therefore
+ // yields a function_call item ahead of the message — matching the streaming
+ // path's ordering for the same `blocks` fixture.
+ const ordered = resolveFixtureBlocks(blocks);
+ const output: object[] = [];
+ if (reasoning) {
+ output.push({
+ type: "reasoning",
+ id: generateId("rs"),
+ summary: [{ type: "summary_text", text: reasoning }],
+ });
+ }
+ if (webSearches && webSearches.length > 0) {
+ for (const query of webSearches) {
+ output.push({
+ type: "web_search_call",
+ id: generateId("ws"),
+ status: "completed",
+ action: { type: "search", query },
+ });
+ }
+ }
+ for (const block of ordered) {
+ if (block.type === "text") {
+ output.push(buildMessageOutputItem(block.text));
+ } else {
+ output.push(
+ buildFunctionCallOutputItem({
+ name: block.name,
+ arguments: block.arguments,
+ id: block.id,
+ }),
+ );
+ }
+ }
+ return buildResponseEnvelope(model, output, overrides);
+ }
+
+ // LEGACY PATH: message item first, then function_call items — unchanged.
const output = buildOutputPrefix(content, reasoning, webSearches);
for (const tc of toolCalls) {
- output.push({
- type: "function_call",
- id: generateId("fc"),
- call_id: tc.id || generateToolCallId(),
- name: tc.name,
- arguments: tc.arguments,
- status: "completed",
- });
+ output.push(buildFunctionCallOutputItem(tc));
}
return buildResponseEnvelope(model, output, overrides);
}
@@ -1136,6 +1260,7 @@ export async function handleResponses(
effReasoning,
response.webSearches,
overrides,
+ response.blocks,
);
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(body));
@@ -1148,6 +1273,7 @@ export async function handleResponses(
effReasoning,
response.webSearches,
overrides,
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
const completed = await writeResponsesSSEStream(res, events, {
diff --git a/src/server.ts b/src/server.ts
index 31f57e80..31da8692 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -865,6 +865,7 @@ async function handleCompletions(
chunkSize,
effReasoning,
overrides,
+ response.blocks,
);
// Build usage chunk for stream_options.include_usage
const completionText =
diff --git a/src/ws-responses.ts b/src/ws-responses.ts
index 62fc92e8..c1e0f965 100644
--- a/src/ws-responses.ts
+++ b/src/ws-responses.ts
@@ -282,6 +282,7 @@ async function processMessage(
),
response.webSearches,
extractOverrides(response),
+ response.blocks,
);
const interruption = createInterruptionSignal(fixture);
From 5f2623ad3522ddf203cabd6e1344b7dc571378f3 Mon Sep 17 00:00:00 2001
From: Jordan Ritter
Date: Fri, 26 Jun 2026 23:11:39 -0700
Subject: [PATCH 3/5] feat(record): capture stream block order so the recorder
persists tool-first fixtures (#274)
Preserve the observed streaming block order during collapse so the recorder
writes tool-first fixtures faithfully.
---
src/recorder.ts | 13 +-
src/stream-collapse.ts | 279 +++++++++++++++++++++++++++++++++++++----
2 files changed, 264 insertions(+), 28 deletions(-)
diff --git a/src/recorder.ts b/src/recorder.ts
index 2d0a172d..d98598e7 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -699,10 +699,21 @@ export async function proxyAndRecord(
arguments: tc.arguments ?? "{}",
}));
if (collapsed.content) {
- // Both content and toolCalls present — save as ContentWithToolCallsResponse
+ // Both content and toolCalls present — save as ContentWithToolCallsResponse.
+ //
+ // Ordered `blocks` (#274) is persisted ONLY when the collapser
+ // classified the stream as interleaved — a tool-call delta appeared
+ // strictly before the first content delta, OR a content delta
+ // appeared after any tool-call delta. The collapser encodes exactly
+ // that rule: it sets `collapsed.blocks` only in those cases and
+ // leaves it undefined otherwise. So the recorder simply spreads it
+ // when present; an ordinary text-then-tools (or text-only) stream has
+ // no `blocks` and persists the legacy shape byte-identically.
+ const blocksSpread = collapsed.blocks?.length ? { blocks: collapsed.blocks } : {};
fixtureResponse = {
content: collapsed.content,
toolCalls: sanitizedToolCalls,
+ ...blocksSpread,
...reasoningSpread,
...reasoningSignatureSpread,
...redactedThinkingSpread,
diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts
index 7b3486bd..1fc007c5 100644
--- a/src/stream-collapse.ts
+++ b/src/stream-collapse.ts
@@ -8,7 +8,7 @@
*/
import { crc32 } from "node:zlib";
-import type { RecordProviderKey, ToolCall } from "./types.js";
+import type { FixtureBlock, RecordProviderKey, ToolCall } from "./types.js";
import type { Logger } from "./logger.js";
import { isHarmonyContent, parseHarmonyContent } from "./harmony.js";
@@ -55,6 +55,131 @@ export interface CollapseResult {
harmonyUnparsed?: true;
/** Short human-readable note accompanying {@link harmonyUnparsed}. */
harmonyNote?: string;
+ /**
+ * Ordered cross-channel block list, in STREAM order, populated ONLY when the
+ * stream is "interleaved" — i.e. a tool-call delta appeared STRICTLY BEFORE
+ * the first content delta, OR a content delta appeared AFTER any tool-call
+ * delta. The flat `content` / `toolCalls` fields stay populated UNCHANGED for
+ * replay back-compat and non-block consumers; `blocks` is purely additive
+ * positional instrumentation the recorder consults to decide whether to
+ * persist the ordered shape. Absent (undefined) for text-first, text-only,
+ * and tool-only streams — i.e. anything NOT interleaved — so the recorder
+ * keeps the legacy `{ content, toolCalls }` shape byte-identical.
+ *
+ * Each text block coalesces all contiguous content deltas between tool
+ * atoms; each toolCall block carries the fully-assembled name/arguments/id
+ * for one tool call in the position its FIRST delta arrived.
+ */
+ blocks?: FixtureBlock[];
+}
+
+// ---------------------------------------------------------------------------
+// Cross-channel block-order instrumentation (#274)
+// ---------------------------------------------------------------------------
+
+/**
+ * Atom recorded during a collapse pass, in stream arrival order. A `text` atom
+ * carries one content delta's text (contiguous text atoms are coalesced when
+ * building blocks); a `toolCall` atom is a stable reference to a tool-call
+ * accumulator whose name/arguments/id are filled in across later deltas. The
+ * `ref` is the SAME object stored in the collapser's `toolCallMap` (or pushed
+ * to a flat `toolCalls` array), so block identity is reconciled with the flat
+ * representation at finalize time — see {@link buildOrderedBlocks}.
+ */
+type OrderAtom =
+ | { kind: "text"; text: string }
+ | { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } };
+
+/**
+ * Normalize a tool call's accumulated `arguments` into valid JSON exactly like
+ * the flat-`toolCalls` recorder path: an empty / whitespace-only / missing
+ * value becomes `"{}"`, never `""`. Mirrors `recorder.ts` `toToolCallArguments`
+ * so a `blocks[].arguments` value is always parseable JSON and never disagrees
+ * with the flat `toolCalls` entry for the same call.
+ */
+function normalizeToolArguments(args: string | undefined): string {
+ if (args === undefined || args.trim() === "") return "{}";
+ return args;
+}
+
+/**
+ * Build a finalized {@link FixtureBlock.toolCall} from a tool-call accumulator,
+ * normalizing `arguments` so the block agrees byte-for-byte with the flat
+ * `toolCalls` entry built from the SAME accumulator object.
+ */
+function toToolCallBlock(ref: { name: string; arguments: string; id?: string }): FixtureBlock {
+ return {
+ type: "toolCall",
+ name: ref.name,
+ arguments: normalizeToolArguments(ref.arguments),
+ ...(ref.id ? { id: ref.id } : {}),
+ };
+}
+
+/**
+ * Decide whether a recorded atom sequence is "interleaved" and, if so, build
+ * the ordered {@link FixtureBlock} list. Returns `undefined` when NOT
+ * interleaved (text-first, text-only, or tool-only) so callers leave
+ * `CollapseResult.blocks` unset and the recorder keeps the legacy shape.
+ *
+ * Interleaved ⇔ (a tool atom appears strictly before the first text atom) OR
+ * (a text atom appears after any tool atom). A stream with no tool atoms, or
+ * with no text atoms, is never interleaved. Text-first-then-tools is the common
+ * legacy case and is explicitly NOT interleaved.
+ *
+ * CONSISTENCY (#274): each toolCall block is derived from the SAME accumulator
+ * object referenced by its atom and normalized identically to the flat
+ * `toolCalls` path ({@link toToolCallBlock} / {@link normalizeToolArguments}).
+ * Because the atom `ref` is the very object the flat list is built from, the
+ * block and its flat counterpart describe the same call by identity — even when
+ * upstream tool-call indices do not match stream-arrival order. Empty/missing
+ * arguments normalize to `"{}"` in BOTH representations, never `""`.
+ */
+function buildOrderedBlocks(atoms: OrderAtom[]): FixtureBlock[] | undefined {
+ let firstTextIndex = -1;
+ let firstToolIndex = -1;
+ let textAfterTool = false;
+ let sawTool = false;
+ let sawText = false;
+ for (let i = 0; i < atoms.length; i++) {
+ const a = atoms[i];
+ if (a.kind === "text") {
+ sawText = true;
+ if (firstTextIndex === -1) firstTextIndex = i;
+ if (sawTool) textAfterTool = true;
+ } else {
+ sawTool = true;
+ if (firstToolIndex === -1) firstToolIndex = i;
+ }
+ }
+ // No cross-channel ordering to express unless BOTH channels appear.
+ if (!sawTool || !sawText) return undefined;
+ const toolBeforeText = firstToolIndex < firstTextIndex;
+ if (!toolBeforeText && !textAfterTool) return undefined;
+
+ // Coalesce contiguous text atoms into one text block; emit each tool atom as
+ // a toolCall block reflecting its fully-assembled, normalized accumulator.
+ const blocks: FixtureBlock[] = [];
+ let pendingText = "";
+ let hasPendingText = false;
+ const flushText = () => {
+ if (hasPendingText) {
+ blocks.push({ type: "text", text: pendingText });
+ pendingText = "";
+ hasPendingText = false;
+ }
+ };
+ for (const a of atoms) {
+ if (a.kind === "text") {
+ pendingText += a.text;
+ hasPendingText = true;
+ } else {
+ flushText();
+ blocks.push(toToolCallBlock(a.ref));
+ }
+ }
+ flushText();
+ return blocks;
}
/**
@@ -171,6 +296,10 @@ export function collapseOpenAISSE(body: string): CollapseResult {
// it (they are small per-stream counters), so synthetic keys never collide.
let nextSyntheticIndex = 1_000_000;
const idKeyMap = new Map();
+ // Cross-channel order atoms (#274), in stream arrival order. A toolCall atom
+ // references the same accumulator object stored in toolCallMap, so later arg
+ // deltas mutate the block in place.
+ const orderAtoms: OrderAtom[] = [];
for (const line of lines) {
const data = extractSSEData(splitSSELines(line));
@@ -237,6 +366,9 @@ export function collapseOpenAISSE(body: string): CollapseResult {
// Text content
if (typeof delta.content === "string") {
content += delta.content;
+ if (delta.content.length > 0) {
+ orderAtoms.push({ kind: "text", text: delta.content });
+ }
}
// Tool calls
@@ -265,11 +397,15 @@ export function collapseOpenAISSE(body: string): CollapseResult {
}
if (!toolCallMap.has(index)) {
- toolCallMap.set(index, {
+ const created = {
id: rawId ?? "",
name: (fn?.name as string) ?? "",
arguments: "",
- });
+ };
+ toolCallMap.set(index, created);
+ // Record the tool atom at the position its FIRST delta arrived; it
+ // references `created` so later name/arg deltas fill it in place.
+ orderAtoms.push({ kind: "toolCall", ref: created });
}
const entry = toolCallMap.get(index)!;
@@ -311,19 +447,36 @@ export function collapseOpenAISSE(body: string): CollapseResult {
}
if (toolCallMap.size > 0 || harmonyToolCalls.length > 0) {
- const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+ const blocks = buildOrderedBlocks(orderAtoms);
+ // When the stream is interleaved we persist ordered `blocks`; the flat
+ // `toolCalls` MUST then describe the same calls in the same order so the two
+ // representations never disagree (#274). The toolCall atoms reference the
+ // same accumulator objects as `toolCallMap`, so derive the flat list from
+ // those atoms (stream-arrival order, matching blocks) when blocks exist;
+ // otherwise keep the legacy index-sorted order for byte-identical fixtures.
+ const orderedToolCalls = orderAtoms
+ .filter(
+ (a): a is { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } } =>
+ a.kind === "toolCall",
+ )
+ .map((a) => ({
+ name: a.ref.name,
+ arguments: normalizeToolArguments(a.ref.arguments),
+ ...(a.ref.id ? { id: a.ref.id } : {}),
+ }));
+ const indexSortedToolCalls = Array.from(toolCallMap.entries())
+ .sort(([a], [b]) => a - b)
+ .map(([, tc]) => ({
+ name: tc.name,
+ arguments: normalizeToolArguments(tc.arguments),
+ ...(tc.id ? { id: tc.id } : {}),
+ }));
return {
+ ...(blocks ? { blocks } : {}),
...(content ? { content } : {}),
// Fallback-only: harmonyToolCalls are populated ONLY in the
// no-structured-calls branch, so this is never a merge of both sources.
- toolCalls: [
- ...sorted.map(([, tc]) => ({
- name: tc.name,
- arguments: tc.arguments,
- ...(tc.id ? { id: tc.id } : {}),
- })),
- ...harmonyToolCalls,
- ],
+ toolCalls: [...(blocks ? orderedToolCalls : indexSortedToolCalls), ...harmonyToolCalls],
// Reasoning is preserved alongside tool calls for ALL structured streams
// (DeepSeek/OpenRouter reasoning_content, harmony analysis channel), at
// parity with every other collapser and the non-streaming path.
@@ -388,6 +541,8 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
// below it.
let nextSyntheticIndex = 1_000_000;
let lastSyntheticIndex: number | undefined;
+ // Cross-channel order atoms (#274), in stream arrival order.
+ const orderAtoms: OrderAtom[] = [];
for (const block of blocks) {
const lines = splitSSELines(block);
@@ -430,11 +585,15 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
index = nextSyntheticIndex++;
}
lastSyntheticIndex = index;
- toolCallMap.set(index, {
+ const created = {
id: (contentBlock.id as string) ?? "",
name: (contentBlock.name as string) ?? "",
arguments: "",
- });
+ };
+ toolCallMap.set(index, created);
+ // Record the tool atom at the position the tool_use block opened; it
+ // references `created` so later input_json_delta fragments fill it in.
+ orderAtoms.push({ kind: "toolCall", ref: created });
}
}
@@ -444,6 +603,9 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
if (delta.type === "text_delta" && typeof delta.text === "string") {
content += delta.text;
+ if (delta.text.length > 0) {
+ orderAtoms.push({ kind: "text", text: delta.text });
+ }
}
if (delta.type === "thinking_delta" && typeof delta.thinking === "string") {
@@ -485,14 +647,33 @@ export function collapseAnthropicSSE(body: string): CollapseResult {
}
if (toolCallMap.size > 0) {
- const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
- return {
- ...(content ? { content } : {}),
- toolCalls: sorted.map(([, tc]) => ({
+ const orderedBlocks = buildOrderedBlocks(orderAtoms);
+ // When interleaved (`blocks` present) the flat `toolCalls` MUST match the
+ // blocks' order/identity (#274). The toolCall atoms reference the same
+ // accumulator objects as `toolCallMap`, so derive the flat list from those
+ // atoms (stream-arrival order) when blocks exist; otherwise keep the legacy
+ // index-sorted order for byte-identical fixtures.
+ const orderedToolCalls = orderAtoms
+ .filter(
+ (a): a is { kind: "toolCall"; ref: { name: string; arguments: string; id?: string } } =>
+ a.kind === "toolCall",
+ )
+ .map((a) => ({
+ name: a.ref.name,
+ arguments: normalizeToolArguments(a.ref.arguments),
+ ...(a.ref.id ? { id: a.ref.id } : {}),
+ }));
+ const indexSortedToolCalls = Array.from(toolCallMap.entries())
+ .sort(([a], [b]) => a - b)
+ .map(([, tc]) => ({
name: tc.name,
- arguments: tc.arguments,
+ arguments: normalizeToolArguments(tc.arguments),
...(tc.id ? { id: tc.id } : {}),
- })),
+ }));
+ return {
+ ...(orderedBlocks ? { blocks: orderedBlocks } : {}),
+ ...(content ? { content } : {}),
+ toolCalls: orderedBlocks ? orderedToolCalls : indexSortedToolCalls,
...(reasoning ? { reasoning } : {}),
...(reasoningSignature ? { reasoningSignature } : {}),
...(redactedThinking.length > 0 ? { redactedThinking } : {}),
@@ -530,6 +711,8 @@ export function collapseGeminiSSE(body: string): CollapseResult {
let audioB64 = "";
let audioMimeType: string | undefined;
const toolCalls: ToolCall[] = [];
+ // Cross-channel order atoms (#274), in stream arrival order.
+ const orderAtoms: OrderAtom[] = [];
for (const line of lines) {
const data = extractSSEData(splitSSELines(line));
@@ -561,7 +744,7 @@ export function collapseGeminiSSE(body: string): CollapseResult {
for (const part of parts) {
if (part.functionCall) {
const fc = part.functionCall as Record;
- toolCalls.push({
+ const created: ToolCall = {
name: String(fc.name ?? ""),
// Default undefined/object args to a JSON object string (matches
// collapseGeminiInteractionsSSE / Ollama). JSON.stringify(undefined)
@@ -569,7 +752,10 @@ export function collapseGeminiSSE(body: string): CollapseResult {
// ToolCall.arguments:string contract.
arguments:
typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args ?? {}),
- });
+ };
+ toolCalls.push(created);
+ // Record the tool atom at the position this functionCall part arrived.
+ orderAtoms.push({ kind: "toolCall", ref: created });
} else if (
part.inlineData &&
typeof (part.inlineData as Record).mimeType === "string" &&
@@ -587,30 +773,51 @@ export function collapseGeminiSSE(body: string): CollapseResult {
reasoning += part.text;
} else {
content += part.text;
+ if (part.text.length > 0) {
+ orderAtoms.push({ kind: "text", text: part.text });
+ }
}
}
}
}
+ // Normalize the flat tool calls' arguments identically to the block path so
+ // the two representations never disagree (#274). The toolCall atoms reference
+ // the same `created` objects pushed here, so blocks and flat describe the same
+ // calls in the same order; this only reconciles empty/missing → "{}".
+ const normalizedToolCalls = toolCalls.map((tc) => ({
+ ...tc,
+ arguments: normalizeToolArguments(tc.arguments),
+ }));
+
if (audioB64) {
// Preserve any content / reasoning / tool calls accumulated in the same
// stream — a Gemini turn can interleave audio with text and functionCall
// parts, and the early return must not silently drop them.
+ //
+ // Deliberately do NOT build ordered `blocks` here (#274, R2-N2): the audio
+ // collapse shape maps to AudioResponse, which has no `blocks` slot, and the
+ // recorder's audio branch never persists `collapsed.blocks`. Producing block
+ // ordering on this path would be silently produced-then-dropped, advertising
+ // a field this result shape can't carry. Block ordering is built only on the
+ // content+toolCalls path below, which can actually carry it.
return {
audioB64,
audioMimeType,
...(content ? { content } : {}),
...(reasoning ? { reasoning } : {}),
- ...(toolCalls.length > 0 ? { toolCalls } : {}),
+ ...(normalizedToolCalls.length > 0 ? { toolCalls: normalizedToolCalls } : {}),
...(droppedChunks > 0 ? { droppedChunks } : {}),
...(firstDroppedSample ? { firstDroppedSample } : {}),
};
}
if (toolCalls.length > 0) {
+ const blocks = buildOrderedBlocks(orderAtoms);
return {
+ ...(blocks ? { blocks } : {}),
...(content ? { content } : {}),
- toolCalls,
+ toolCalls: normalizedToolCalls,
...(reasoning ? { reasoning } : {}),
...(droppedChunks > 0 ? { droppedChunks } : {}),
...(firstDroppedSample ? { firstDroppedSample } : {}),
@@ -652,6 +859,8 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
let harmonyUnparsed = false;
let harmonyNote: string | undefined;
const toolCalls: ToolCall[] = [];
+ // Cross-channel order atoms (#274), in stream arrival order.
+ const orderAtoms: OrderAtom[] = [];
for (const line of lines) {
let parsed: Record;
@@ -671,6 +880,9 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
if (message) {
if (typeof message.content === "string") {
content += message.content;
+ if (message.content.length > 0) {
+ orderAtoms.push({ kind: "text", text: message.content });
+ }
}
// Tool calls
@@ -678,7 +890,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
for (const tc of message.tool_calls as Array>) {
const fn = tc.function as Record | undefined;
if (fn) {
- toolCalls.push({
+ const created: ToolCall = {
name: String(fn.name ?? ""),
// Default undefined/object args to a JSON object (matching
// collapseGeminiInteractionsSSE) — JSON.stringify(undefined)
@@ -687,7 +899,9 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
typeof fn.arguments === "string"
? fn.arguments
: JSON.stringify(fn.arguments ?? {}),
- });
+ };
+ toolCalls.push(created);
+ orderAtoms.push({ kind: "toolCall", ref: created });
}
}
}
@@ -696,6 +910,9 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
// /api/generate format
else if (typeof parsed.response === "string") {
content += parsed.response;
+ if (parsed.response.length > 0) {
+ orderAtoms.push({ kind: "text", text: parsed.response });
+ }
}
}
@@ -721,9 +938,17 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
}
if (toolCalls.length > 0) {
+ const blocks = buildOrderedBlocks(orderAtoms);
+ // Normalize flat arguments identically to the block path so the two
+ // representations never disagree (#274); same `created` refs, same order.
+ const normalizedToolCalls = toolCalls.map((tc) => ({
+ ...tc,
+ arguments: normalizeToolArguments(tc.arguments),
+ }));
return {
+ ...(blocks ? { blocks } : {}),
...(content ? { content } : {}),
- toolCalls,
+ toolCalls: normalizedToolCalls,
...(reasoning ? { reasoning } : {}),
...(droppedChunks > 0 ? { droppedChunks } : {}),
...(firstDroppedSample ? { firstDroppedSample } : {}),
From 6174c97d1c3d92a45a467bbf2b0b3003a979e12d Mon Sep 17 00:00:00 2001
From: Jordan Ritter
Date: Fri, 26 Jun 2026 23:11:44 -0700
Subject: [PATCH 4/5] test(fixtures): block-ordering coverage across replay,
record, e2e, and back-compat (#274)
Add and extend tests covering block-ordered replay per provider, recorder
capture, end-to-end flow, and backward compatibility with legacy fixtures.
---
src/__tests__/async-fixture-response.test.ts | 37 ++
src/__tests__/content-with-toolcalls.test.ts | 66 +++-
.../fixture-blocks-anthropic.test.ts | 135 +++++++
src/__tests__/fixture-blocks-e2e.test.ts | 242 ++++++++++++
src/__tests__/fixture-blocks-gemini.test.ts | 157 ++++++++
src/__tests__/fixture-blocks-loader.test.ts | 134 +++++++
.../fixture-blocks-nonstreaming.test.ts | 218 +++++++++++
src/__tests__/fixture-blocks-ollama.test.ts | 137 +++++++
src/__tests__/fixture-blocks-openai.test.ts | 159 ++++++++
.../fixture-blocks-responses.test.ts | 201 ++++++++++
.../fixture-blocks-scoped-out.test.ts | 143 ++++++++
src/__tests__/fixture-loader.test.ts | 154 ++++++++
src/__tests__/recorder.test.ts | 114 ++++++
src/__tests__/stream-collapse.test.ts | 346 ++++++++++++++++++
src/__tests__/ws-responses.test.ts | 79 +++-
15 files changed, 2319 insertions(+), 3 deletions(-)
create mode 100644 src/__tests__/fixture-blocks-anthropic.test.ts
create mode 100644 src/__tests__/fixture-blocks-e2e.test.ts
create mode 100644 src/__tests__/fixture-blocks-gemini.test.ts
create mode 100644 src/__tests__/fixture-blocks-loader.test.ts
create mode 100644 src/__tests__/fixture-blocks-nonstreaming.test.ts
create mode 100644 src/__tests__/fixture-blocks-ollama.test.ts
create mode 100644 src/__tests__/fixture-blocks-openai.test.ts
create mode 100644 src/__tests__/fixture-blocks-responses.test.ts
create mode 100644 src/__tests__/fixture-blocks-scoped-out.test.ts
diff --git a/src/__tests__/async-fixture-response.test.ts b/src/__tests__/async-fixture-response.test.ts
index f4aa0e67..0ed69fd9 100644
--- a/src/__tests__/async-fixture-response.test.ts
+++ b/src/__tests__/async-fixture-response.test.ts
@@ -225,6 +225,43 @@ describe("async fixture response (function responses)", () => {
expect(res.status).toBe(500);
});
+ it("stringifies object arguments on a factory-returned toolCall block", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.on(
+ { userMessage: "blocks-fn" },
+ () =>
+ ({
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: { city: "NYC" } }],
+ blocks: [
+ // OBJECT arguments — must be auto-stringified like toolCalls[].arguments,
+ // otherwise resolveFixtureBlocks throws (FixtureBlock requires string args).
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ { type: "toolCall", name: "get_weather", arguments: { city: "NYC" } } as any,
+ ],
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ }) as any,
+ );
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "blocks-fn" }],
+ stream: true,
+ }),
+ });
+
+ expect(res.status).toBe(200);
+ const chunks = parseSSEChunks(await res.text());
+ const args = chunks
+ .map((c) => c.choices?.[0]?.delta?.tool_calls?.[0]?.function?.arguments ?? "")
+ .join("");
+ expect(args).toBe('{"city":"NYC"}');
+ });
+
it("works with async factory and streaming", async () => {
mock = new LLMock({ port: 0 });
mock.on({ userMessage: "async-stream" }, async () => {
diff --git a/src/__tests__/content-with-toolcalls.test.ts b/src/__tests__/content-with-toolcalls.test.ts
index f10d92e8..a35bcc6a 100644
--- a/src/__tests__/content-with-toolcalls.test.ts
+++ b/src/__tests__/content-with-toolcalls.test.ts
@@ -1,7 +1,12 @@
import { describe, it, expect, afterEach } from "vitest";
-import { isContentWithToolCallsResponse, isTextResponse, isToolCallResponse } from "../helpers.js";
+import {
+ isContentWithToolCallsResponse,
+ isTextResponse,
+ isToolCallResponse,
+ resolveFixtureBlocks,
+} from "../helpers.js";
import { LLMock } from "../llmock.js";
-import type { SSEChunk } from "../types.js";
+import type { FixtureBlock, SSEChunk } from "../types.js";
describe("isContentWithToolCallsResponse", () => {
it("returns true when both content and toolCalls are present", () => {
@@ -39,6 +44,63 @@ describe("isContentWithToolCallsResponse", () => {
});
});
+describe("resolveFixtureBlocks", () => {
+ it("passes a valid mixed blocks array through in order", () => {
+ const blocks: FixtureBlock[] = [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go" },
+ { type: "toolCall", name: "get_time", arguments: "{}", id: "call_1" },
+ ];
+ const result = resolveFixtureBlocks(blocks);
+ // Same reference, same order — passthrough, not reconstruction.
+ expect(result).toBe(blocks);
+ expect(result.map((b) => b.type)).toEqual(["toolCall", "text", "toolCall"]);
+ });
+
+ it("accepts a text block with a string text field", () => {
+ const blocks: FixtureBlock[] = [{ type: "text", text: "hi" }];
+ expect(resolveFixtureBlocks(blocks)).toEqual(blocks);
+ });
+
+ it("accepts a toolCall block without an optional id", () => {
+ const blocks: FixtureBlock[] = [{ type: "toolCall", name: "f", arguments: "{}" }];
+ expect(resolveFixtureBlocks(blocks)).toEqual(blocks);
+ });
+
+ it("rejects a non-array argument", () => {
+ expect(() => resolveFixtureBlocks({} as unknown as FixtureBlock[])).toThrow(
+ /expected an array/,
+ );
+ });
+
+ it("rejects a text block with a non-string text field", () => {
+ const blocks = [{ type: "text", text: 42 }] as unknown as FixtureBlock[];
+ expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*string "text"/);
+ });
+
+ it("rejects a toolCall block missing arguments", () => {
+ const blocks = [{ type: "toolCall", name: "f" }] as unknown as FixtureBlock[];
+ expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*"name" and "arguments"/);
+ });
+
+ it("rejects a toolCall block with a non-string id", () => {
+ const blocks = [
+ { type: "toolCall", name: "f", arguments: "{}", id: 1 },
+ ] as unknown as FixtureBlock[];
+ expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*"id" must be a string/);
+ });
+
+ it("rejects a block with an unknown type", () => {
+ const blocks = [{ type: "image" }] as unknown as FixtureBlock[];
+ expect(() => resolveFixtureBlocks(blocks)).toThrow(/unknown type/);
+ });
+
+ it("rejects a null entry", () => {
+ const blocks = [null] as unknown as FixtureBlock[];
+ expect(() => resolveFixtureBlocks(blocks)).toThrow(/index 0.*expected an object/);
+ });
+});
+
function parseSSEChunks(body: string): SSEChunk[] {
return body
.split("\n\n")
diff --git a/src/__tests__/fixture-blocks-anthropic.test.ts b/src/__tests__/fixture-blocks-anthropic.test.ts
new file mode 100644
index 00000000..495c5078
--- /dev/null
+++ b/src/__tests__/fixture-blocks-anthropic.test.ts
@@ -0,0 +1,135 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+
+interface AnthropicSSEEvent {
+ type: string;
+ index?: number;
+ content_block?: { type: string; name?: string; input?: unknown };
+ delta?: Record;
+ [key: string]: unknown;
+}
+
+function parseAnthropicSSEEvents(body: string): AnthropicSSEEvent[] {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ if (!dataLine) return null;
+ return JSON.parse(dataLine.slice(6)) as AnthropicSSEEvent;
+ })
+ .filter(Boolean) as AnthropicSSEEvent[];
+}
+
+async function postAnthropicStream(
+ mock: LLMock,
+ userMessage: string,
+): Promise {
+ const res = await fetch(`${mock.url}/v1/messages`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01",
+ },
+ body: JSON.stringify({
+ model: "claude-sonnet-4-6",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: userMessage }],
+ stream: true,
+ }),
+ });
+ return parseAnthropicSSEEvents(await res.text());
+}
+
+describe("Anthropic Messages — ordered fixture blocks (tool-first)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("emits tool_use content block at index 0 and text block at index 1 for [toolCall, text]", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test anthropic blocks tool-first" },
+ response: {
+ content: "Checking.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ });
+ await mock.start();
+
+ const events = await postAnthropicStream(mock, "test anthropic blocks tool-first");
+
+ const starts = events.filter((e) => e.type === "content_block_start");
+ // First content block must be the tool_use (index 0), then text (index 1).
+ expect(starts.length).toBe(2);
+ expect(starts[0].index).toBe(0);
+ expect(starts[0].content_block?.type).toBe("tool_use");
+ expect(starts[0].content_block?.name).toBe("get_weather");
+ expect(starts[1].index).toBe(1);
+ expect(starts[1].content_block?.type).toBe("text");
+
+ // The tool_use start must precede the text start on the wire.
+ const toolIdx = events.findIndex(
+ (e) => e.type === "content_block_start" && e.content_block?.type === "tool_use",
+ );
+ const textIdx = events.findIndex(
+ (e) => e.type === "content_block_start" && e.content_block?.type === "text",
+ );
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ // The tool_use input arrives via input_json_delta on index 0.
+ const toolDelta = events.find(
+ (e) =>
+ e.type === "content_block_delta" && e.index === 0 && e.delta?.type === "input_json_delta",
+ );
+ expect(toolDelta).toBeDefined();
+ expect(toolDelta!.delta!.partial_json).toBe('{"city":"NYC"}');
+
+ // The text arrives via text_delta on index 1.
+ const textDelta = events.find(
+ (e) => e.type === "content_block_delta" && e.index === 1 && e.delta?.type === "text_delta",
+ );
+ expect(textDelta).toBeDefined();
+ expect(textDelta!.delta!.text).toBe("Here you go.");
+
+ // message envelope preserved.
+ const messageStart = events.find((e) => e.type === "message_start");
+ const messageDelta = events.find((e) => e.type === "message_delta");
+ const messageStop = events.find((e) => e.type === "message_stop");
+ expect(messageStart).toBeDefined();
+ expect(messageStop).toBeDefined();
+ expect((messageDelta!.delta as { stop_reason: string }).stop_reason).toBe("tool_use");
+ });
+
+ it("back-compat: a fixture without blocks emits the legacy text-first block at index 0", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test anthropic blocks legacy" },
+ response: {
+ content: "Checking.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const events = await postAnthropicStream(mock, "test anthropic blocks legacy");
+
+ const starts = events.filter((e) => e.type === "content_block_start");
+ // Legacy always emits the text block first (index 0) then tool_use (index 1).
+ expect(starts.length).toBe(2);
+ expect(starts[0].index).toBe(0);
+ expect(starts[0].content_block?.type).toBe("text");
+ expect(starts[1].index).toBe(1);
+ expect(starts[1].content_block?.type).toBe("tool_use");
+ });
+});
diff --git a/src/__tests__/fixture-blocks-e2e.test.ts b/src/__tests__/fixture-blocks-e2e.test.ts
new file mode 100644
index 00000000..5dc3186c
--- /dev/null
+++ b/src/__tests__/fixture-blocks-e2e.test.ts
@@ -0,0 +1,242 @@
+/**
+ * #274 slot T3 — END-TO-END integration for ordered `blocks`.
+ *
+ * Unlike the per-provider builder tests (which construct an in-memory fixture
+ * and call `mock.addFixture(...)`), this suite proves the FULL pipeline works
+ * for a REAL on-disk JSON fixture: a `.json` file is written to a temp dir,
+ * loaded THROUGH THE REAL LOADER via `mock.loadFixtureFile(...)`, served by a
+ * live `LLMock` HTTP server, and the wire bytes are asserted.
+ *
+ * This closes the loader→builder→dispatch loop for the two providers whose
+ * wire format can FULLY express tool-first ordering (Anthropic typed content
+ * blocks; OpenAI Responses output_index sequencing). A blocks-bearing fixture
+ * with `[toolCall, text]` must stream the tool BEFORE the text on both.
+ *
+ * A back-compat guard rounds out the suite: a legacy `{content, toolCalls}`
+ * fixture (no `blocks`) must still stream message/text FIRST — confirming the
+ * branch-not-replace design leaves the legacy path untouched end-to-end.
+ */
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { LLMock } from "../llmock.js";
+
+// ─── SSE parsers (mirror content-with-toolcalls.test.ts) ─────────────────────
+
+function parseAnthropicSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ if (!dataLine) return null;
+ return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown };
+ })
+ .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>;
+}
+
+function parseResponsesSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ if (!dataLine) return null;
+ return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown };
+ })
+ .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>;
+}
+
+// ─── Tmp dir + real-loader fixture file ──────────────────────────────────────
+
+let tmpDir: string;
+let mock: LLMock | null = null;
+
+beforeEach(() => {
+ tmpDir = mkdtempSync(join(tmpdir(), "fixture-blocks-e2e-"));
+});
+
+afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ rmSync(tmpDir, { recursive: true, force: true });
+});
+
+/** Write a fixtures JSON file to the temp dir and return its path. */
+function writeFixtureFile(name: string, content: unknown): string {
+ const filePath = join(tmpDir, name);
+ writeFileSync(filePath, JSON.stringify(content), "utf-8");
+ return filePath;
+}
+
+describe("#274 e2e: ordered blocks loaded through the REAL JSON loader", () => {
+ it("Anthropic streams tool_use BEFORE text for a tool-first blocks .json fixture", async () => {
+ // A real on-disk JSON fixture with tool-first `blocks`. The legacy
+ // text-first {content, toolCalls} shape cannot express this ordering.
+ const filePath = writeFixtureFile("anthropic-tool-first.json", {
+ fixtures: [
+ {
+ match: { userMessage: "e2e anthropic tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ },
+ ],
+ });
+
+ mock = new LLMock({ port: 0 });
+ // THE REAL LOADER: reads + parses + normalizes the .json from disk.
+ mock.loadFixtureFile(filePath);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/messages`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01",
+ },
+ body: JSON.stringify({
+ model: "claude-sonnet-4-6",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: "e2e anthropic tool-first" }],
+ stream: true,
+ }),
+ });
+
+ const events = parseAnthropicSSEEvents(await res.text());
+
+ const textBlockStart = events.find(
+ (e) =>
+ e.type === "content_block_start" && (e.content_block as { type: string })?.type === "text",
+ );
+ const toolBlockStart = events.find(
+ (e) =>
+ e.type === "content_block_start" &&
+ (e.content_block as { type: string })?.type === "tool_use",
+ );
+ expect(textBlockStart).toBeDefined();
+ expect(toolBlockStart).toBeDefined();
+
+ // Tool-first: the tool_use content block precedes the text content block.
+ const toolIdx = events.indexOf(toolBlockStart!);
+ const textIdx = events.indexOf(textBlockStart!);
+ expect(toolIdx).toBeLessThan(textIdx);
+
+ const messageDelta = events.find((e) => e.type === "message_delta");
+ expect((messageDelta!.delta as { stop_reason: string }).stop_reason).toBe("tool_use");
+ });
+
+ it("Responses assigns function_call output_index 0 for a tool-first blocks .json fixture", async () => {
+ const filePath = writeFixtureFile("responses-tool-first.json", {
+ fixtures: [
+ {
+ match: { userMessage: "e2e responses tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ },
+ ],
+ });
+
+ mock = new LLMock({ port: 0 });
+ mock.loadFixtureFile(filePath);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: "e2e responses tool-first" }],
+ stream: true,
+ }),
+ });
+
+ const events = parseResponsesSSEEvents(await res.text());
+
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ expect(fcAdded).toBeDefined();
+ expect(msgAdded).toBeDefined();
+ expect((fcAdded as unknown as { output_index: number }).output_index).toBe(0);
+ expect((msgAdded as unknown as { output_index: number }).output_index).toBe(1);
+
+ const completed = events.find((e) => e.type === "response.completed");
+ const output = (completed!.response as { output: Array<{ type: string }> }).output;
+ const types = output.map((o) => o.type);
+ expect(types[0]).toBe("function_call");
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message"));
+ });
+
+ // ── BACK-COMPAT guard: a legacy {content, toolCalls} fixture (NO blocks)
+ // loaded through the real loader must still stream message/text FIRST.
+ // Proves branch-not-replace leaves the legacy path untouched end-to-end. ──
+ it("legacy .json fixture WITHOUT blocks keeps message-first ordering on Responses", async () => {
+ const filePath = writeFixtureFile("responses-legacy.json", {
+ fixtures: [
+ {
+ match: { userMessage: "e2e responses legacy" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ },
+ ],
+ });
+
+ mock = new LLMock({ port: 0 });
+ mock.loadFixtureFile(filePath);
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: "e2e responses legacy" }],
+ stream: true,
+ }),
+ });
+
+ const events = parseResponsesSSEEvents(await res.text());
+
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ // Legacy hardcoding: message at index 0, function_call at index 1.
+ expect((msgAdded as unknown as { output_index: number }).output_index).toBe(0);
+ expect((fcAdded as unknown as { output_index: number }).output_index).toBe(1);
+
+ const completed = events.find((e) => e.type === "response.completed");
+ const output = (completed!.response as { output: Array<{ type: string }> }).output;
+ const types = output.map((o) => o.type);
+ expect(types.indexOf("message")).toBeLessThan(types.indexOf("function_call"));
+ });
+});
diff --git a/src/__tests__/fixture-blocks-gemini.test.ts b/src/__tests__/fixture-blocks-gemini.test.ts
new file mode 100644
index 00000000..5353aee1
--- /dev/null
+++ b/src/__tests__/fixture-blocks-gemini.test.ts
@@ -0,0 +1,157 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+
+type GeminiStreamChunk = {
+ candidates: Array<{
+ content: { parts: Array<{ text?: string; functionCall?: { name: string } }> };
+ finishReason?: string;
+ }>;
+};
+
+function parseGeminiSSE(body: string): GeminiStreamChunk[] {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ return dataLine ? (JSON.parse(dataLine.slice(6)) as GeminiStreamChunk) : null;
+ })
+ .filter(Boolean) as GeminiStreamChunk[];
+}
+
+async function streamGemini(mock: LLMock, userMessage: string): Promise {
+ const res = await fetch(
+ `${mock.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse`,
+ {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ contents: [{ role: "user", parts: [{ text: userMessage }] }],
+ }),
+ },
+ );
+ return parseGeminiSSE(await res.text());
+}
+
+describe("Gemini — fixture block ordering (tool-first)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("emits the functionCall part BEFORE the text part when blocks lead with a toolCall", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "gemini tool-first blocks" },
+ response: {
+ content: "Here you go",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go" },
+ ],
+ },
+ });
+ await mock.start();
+
+ const chunks = await streamGemini(mock, "gemini tool-first blocks");
+
+ const fcChunks = chunks.filter((c) =>
+ c.candidates[0].content.parts.some((p) => p.functionCall !== undefined),
+ );
+ const textChunks = chunks.filter((c) =>
+ c.candidates[0].content.parts.some((p) => p.text !== undefined),
+ );
+
+ expect(fcChunks.length).toBeGreaterThan(0);
+ expect(textChunks.length).toBeGreaterThan(0);
+
+ // The functionCall part must be emitted before the text part (tool-first order).
+ const firstFcIdx = chunks.indexOf(fcChunks[0]);
+ const firstTextIdx = chunks.indexOf(textChunks[0]);
+ expect(firstFcIdx).toBeLessThan(firstTextIdx);
+
+ // finishReason still lands on the terminal chunk regardless of last block type.
+ const lastChunk = chunks[chunks.length - 1];
+ expect(lastChunk.candidates[0].finishReason).toBe("FUNCTION_CALL");
+
+ const fcPart = fcChunks[0].candidates[0].content.parts.find((p) => p.functionCall);
+ expect(fcPart!.functionCall!.name).toBe("get_weather");
+ });
+
+ it("back-compat: a fixture with no blocks streams identically to the legacy text-first path", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "gemini no blocks" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const chunks = await streamGemini(mock, "gemini no blocks");
+
+ const textChunks = chunks.filter((c) =>
+ c.candidates[0].content.parts.some((p) => p.text !== undefined),
+ );
+ const fcChunks = chunks.filter((c) =>
+ c.candidates[0].content.parts.some((p) => p.functionCall !== undefined),
+ );
+
+ expect(textChunks.length).toBeGreaterThan(0);
+ expect(fcChunks.length).toBeGreaterThan(0);
+
+ // Legacy: text-first, functionCall last, FUNCTION_CALL on terminal chunk.
+ const lastTextIdx = chunks.lastIndexOf(textChunks.at(-1)!);
+ const firstFcIdx = chunks.indexOf(fcChunks[0]);
+ expect(lastTextIdx).toBeLessThan(firstFcIdx);
+
+ const lastChunk = chunks[chunks.length - 1];
+ expect(lastChunk.candidates[0].finishReason).toBe("FUNCTION_CALL");
+
+ const fullText = textChunks
+ .flatMap((c) => c.candidates[0].content.parts.map((p) => p.text ?? ""))
+ .join("");
+ expect(fullText).toBe("Sure.");
+ });
+
+ it("empty blocks array falls back to the legacy path (does not drop content/toolCalls)", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "gemini empty blocks" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [],
+ },
+ });
+ await mock.start();
+
+ const chunks = await streamGemini(mock, "gemini empty blocks");
+
+ const textChunks = chunks.filter((c) =>
+ c.candidates[0].content.parts.some((p) => p.text !== undefined),
+ );
+ const fcChunks = chunks.filter((c) =>
+ c.candidates[0].content.parts.some((p) => p.functionCall !== undefined),
+ );
+
+ // Empty blocks must NOT silently drop content/toolCalls — legacy output emits both.
+ expect(textChunks.length).toBeGreaterThan(0);
+ expect(fcChunks.length).toBeGreaterThan(0);
+
+ const fullText = textChunks
+ .flatMap((c) => c.candidates[0].content.parts.map((p) => p.text ?? ""))
+ .join("");
+ expect(fullText).toBe("Sure.");
+
+ // Terminal finishReason still present (not a malformed, finish-less stream).
+ const lastChunk = chunks[chunks.length - 1];
+ expect(lastChunk.candidates[0].finishReason).toBe("FUNCTION_CALL");
+ });
+});
diff --git a/src/__tests__/fixture-blocks-loader.test.ts b/src/__tests__/fixture-blocks-loader.test.ts
new file mode 100644
index 00000000..b41c967c
--- /dev/null
+++ b/src/__tests__/fixture-blocks-loader.test.ts
@@ -0,0 +1,134 @@
+import { describe, it, expect } from "vitest";
+import { entryToFixture } from "../fixture-loader.js";
+import type { FixtureFileEntry, ContentWithToolCallsResponse, FixtureBlock } from "../types.js";
+
+/* ------------------------------------------------------------------ *
+ * #274 slot T1f — JSON fixture loader carries `blocks`. *
+ * *
+ * T0 added the optional `blocks?: FixtureBlock[]` to the IN-MEMORY *
+ * ContentWithToolCallsResponse. These tests pin that an ON-DISK JSON *
+ * fixture carrying `blocks` survives the loader normalization, that *
+ * a toolCall block's object `arguments` is auto-stringified just *
+ * like the sibling top-level `toolCalls[].arguments`, and that a *
+ * fixture with no `blocks` key loads byte-identically to before. *
+ * ------------------------------------------------------------------ */
+
+describe("#274 fixture loader carries blocks", () => {
+ it("carries a tool-first blocks array through into the in-memory response, in order", () => {
+ const entry: FixtureFileEntry = {
+ match: { userMessage: "do it" },
+ response: {
+ content: "Done.",
+ toolCalls: [{ name: "search", arguments: '{"q":"weather"}' }],
+ // tool-first ordering that the legacy text-first shape cannot express
+ blocks: [
+ { type: "toolCall", name: "search", arguments: '{"q":"weather"}' },
+ { type: "text", text: "Done." },
+ ],
+ } as FixtureFileEntry["response"],
+ };
+
+ const fixture = entryToFixture(entry);
+ const resp = fixture.response as ContentWithToolCallsResponse;
+
+ expect(resp.blocks).toBeDefined();
+ expect(resp.blocks).toHaveLength(2);
+ expect(resp.blocks?.[0]).toEqual({
+ type: "toolCall",
+ name: "search",
+ arguments: '{"q":"weather"}',
+ });
+ expect(resp.blocks?.[1]).toEqual({ type: "text", text: "Done." });
+ // Order preserved exactly as authored.
+ expect((resp.blocks as FixtureBlock[]).map((b) => b.type)).toEqual(["toolCall", "text"]);
+ });
+
+ it("auto-stringifies object arguments inside a toolCall block (mirrors toolCalls[].arguments)", () => {
+ const entry: FixtureFileEntry = {
+ match: { userMessage: "do it" },
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "save", arguments: { id: 1 } }],
+ blocks: [
+ { type: "toolCall", name: "save", arguments: { id: 1, nested: { a: [1, 2] } } },
+ { type: "text", text: "ok" },
+ ],
+ } as unknown as FixtureFileEntry["response"],
+ };
+
+ const fixture = entryToFixture(entry);
+ const resp = fixture.response as ContentWithToolCallsResponse;
+ const block = resp.blocks?.[0] as { type: "toolCall"; arguments: string };
+
+ expect(typeof block.arguments).toBe("string");
+ expect(block.arguments).toBe('{"id":1,"nested":{"a":[1,2]}}');
+ });
+
+ it("leaves string arguments inside a toolCall block unchanged", () => {
+ const entry: FixtureFileEntry = {
+ match: { userMessage: "do it" },
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "save", arguments: '{"id":1}' }],
+ blocks: [{ type: "toolCall", name: "save", arguments: '{"id":1}' }],
+ } as FixtureFileEntry["response"],
+ };
+
+ const fixture = entryToFixture(entry);
+ const resp = fixture.response as ContentWithToolCallsResponse;
+ const block = resp.blocks?.[0] as { type: "toolCall"; arguments: string };
+ expect(block.arguments).toBe('{"id":1}');
+ });
+
+ it("leaves text blocks unchanged", () => {
+ const entry: FixtureFileEntry = {
+ match: { userMessage: "do it" },
+ response: {
+ content: "Hello",
+ toolCalls: [{ name: "noop", arguments: "{}" }],
+ blocks: [{ type: "text", text: "Hello" }],
+ } as FixtureFileEntry["response"],
+ };
+
+ const fixture = entryToFixture(entry);
+ const resp = fixture.response as ContentWithToolCallsResponse;
+ expect(resp.blocks?.[0]).toEqual({ type: "text", text: "Hello" });
+ });
+
+ it("back-compat: a fixture WITHOUT blocks loads identically (blocks stays undefined)", () => {
+ const entry: FixtureFileEntry = {
+ match: { userMessage: "legacy" },
+ response: {
+ content: "Legacy answer.",
+ toolCalls: [{ name: "search", arguments: '{"q":"x"}' }],
+ },
+ };
+
+ const fixture = entryToFixture(entry);
+ const resp = fixture.response as ContentWithToolCallsResponse;
+
+ expect(resp.blocks).toBeUndefined();
+ expect("blocks" in resp).toBe(false);
+ expect(resp.content).toBe("Legacy answer.");
+ expect(resp.toolCalls).toEqual([{ name: "search", arguments: '{"q":"x"}' }]);
+ });
+
+ it("ignores a non-array blocks value rather than corrupting the response", () => {
+ const entry: FixtureFileEntry = {
+ match: { userMessage: "bad" },
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "noop", arguments: "{}" }],
+ // malformed: blocks is not an array — loader leaves it as-is (no normalization),
+ // mirroring how toolCalls normalization is gated on Array.isArray.
+ blocks: "not-an-array",
+ } as unknown as FixtureFileEntry["response"],
+ };
+
+ const fixture = entryToFixture(entry);
+ const resp = fixture.response as ContentWithToolCallsResponse & { blocks?: unknown };
+ // Non-array blocks pass through untouched (no stringify attempt, no crash);
+ // downstream validation/builders own shape rejection.
+ expect(resp.blocks).toBe("not-an-array");
+ });
+});
diff --git a/src/__tests__/fixture-blocks-nonstreaming.test.ts b/src/__tests__/fixture-blocks-nonstreaming.test.ts
new file mode 100644
index 00000000..520910ee
--- /dev/null
+++ b/src/__tests__/fixture-blocks-nonstreaming.test.ts
@@ -0,0 +1,218 @@
+/**
+ * F2 (#274) — NON-streaming `blocks` ordering for order-observable surfaces.
+ *
+ * When a combined content+toolCalls fixture sets the optional `blocks` field,
+ * the non-streaming builders for the three surfaces whose response body is a
+ * positionally-observable ORDERED array MUST emit that array in block order:
+ *
+ * - Claude /v1/messages → `content[]` (text / tool_use)
+ * - Gemini :generateContent → `candidates[0].content.parts[]`
+ * - Responses /v1/responses → `output[]` (message / function_call)
+ *
+ * A `[toolCall, text]` fixture must therefore put the tool entry BEFORE the
+ * text entry in each non-streaming array (the opposite of the legacy
+ * text-first hardcoding). A fixture WITHOUT `blocks` must stay legacy
+ * text-first (back-compat).
+ *
+ * Real mock-server surface (mirrors the streaming per-provider tests): an
+ * actual `LLMock` listens, a real non-streaming HTTP request is made, and
+ * assertions read the wire JSON body.
+ */
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+import type { FixtureBlock } from "../types.js";
+
+const TOOL_FIRST_BLOCKS: FixtureBlock[] = [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+];
+
+describe("Non-streaming fixture block ordering (#274)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ // ── Claude /v1/messages — content[] is order-observable ───────────────────
+ describe("Claude /v1/messages", () => {
+ async function postClaude(userMessage: string): Promise<{
+ content: Array<{ type: string; text?: string; name?: string }>;
+ }> {
+ const res = await fetch(`${mock!.url}/v1/messages`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01",
+ },
+ body: JSON.stringify({
+ model: "claude-sonnet-4-6",
+ max_tokens: 1024,
+ messages: [{ role: "user", content: userMessage }],
+ stream: false,
+ }),
+ });
+ return res.json() as Promise<{
+ content: Array<{ type: string; text?: string; name?: string }>;
+ }>;
+ }
+
+ it("tool-first blocks: content[] leads with tool_use, then text", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "claude nonstream tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: TOOL_FIRST_BLOCKS,
+ },
+ });
+ await mock.start();
+
+ const body = await postClaude("claude nonstream tool-first");
+ const types = body.content.map((b) => b.type);
+ expect(types.indexOf("tool_use")).toBeLessThan(types.indexOf("text"));
+ expect(types[0]).toBe("tool_use");
+ expect(body.content[0].name).toBe("get_weather");
+ expect(body.content[1].text).toBe("Here you go.");
+ });
+
+ it("back-compat: no blocks keeps legacy text-first content[]", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "claude nonstream no-blocks" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const body = await postClaude("claude nonstream no-blocks");
+ const types = body.content.map((b) => b.type);
+ expect(types.indexOf("text")).toBeLessThan(types.indexOf("tool_use"));
+ expect(types[0]).toBe("text");
+ });
+ });
+
+ // ── Gemini :generateContent — parts[] is order-observable ─────────────────
+ describe("Gemini :generateContent", () => {
+ async function postGemini(userMessage: string): Promise<{
+ candidates: Array<{
+ content: { parts: Array<{ text?: string; functionCall?: { name: string } }> };
+ }>;
+ }> {
+ const res = await fetch(`${mock!.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ contents: [{ role: "user", parts: [{ text: userMessage }] }],
+ }),
+ });
+ return res.json() as Promise<{
+ candidates: Array<{
+ content: { parts: Array<{ text?: string; functionCall?: { name: string } }> };
+ }>;
+ }>;
+ }
+
+ it("tool-first blocks: parts[] leads with functionCall, then text", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "gemini nonstream tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: TOOL_FIRST_BLOCKS,
+ },
+ });
+ await mock.start();
+
+ const body = await postGemini("gemini nonstream tool-first");
+ const parts = body.candidates[0].content.parts;
+ const fcIdx = parts.findIndex((p) => p.functionCall);
+ const textIdx = parts.findIndex((p) => typeof p.text === "string" && !("thought" in p));
+ expect(fcIdx).toBeGreaterThanOrEqual(0);
+ expect(textIdx).toBeGreaterThanOrEqual(0);
+ expect(fcIdx).toBeLessThan(textIdx);
+ expect(parts[0].functionCall?.name).toBe("get_weather");
+ });
+
+ it("back-compat: no blocks keeps legacy text-first parts[]", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "gemini nonstream no-blocks" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const body = await postGemini("gemini nonstream no-blocks");
+ const parts = body.candidates[0].content.parts;
+ const fcIdx = parts.findIndex((p) => p.functionCall);
+ const textIdx = parts.findIndex((p) => typeof p.text === "string");
+ expect(textIdx).toBeLessThan(fcIdx);
+ expect(parts[0].text).toBe("Sure.");
+ });
+ });
+
+ // ── OpenAI Responses /v1/responses — output[] is order-observable ─────────
+ describe("OpenAI Responses /v1/responses", () => {
+ async function postResponses(userMessage: string): Promise<{
+ output: Array<{ type: string; name?: string }>;
+ }> {
+ const res = await fetch(`${mock!.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: userMessage }],
+ stream: false,
+ }),
+ });
+ return res.json() as Promise<{ output: Array<{ type: string; name?: string }> }>;
+ }
+
+ it("tool-first blocks: output[] leads with function_call, then message", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "responses nonstream tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: TOOL_FIRST_BLOCKS,
+ },
+ });
+ await mock.start();
+
+ const body = await postResponses("responses nonstream tool-first");
+ const types = body.output.map((o) => o.type);
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message"));
+ expect(types[0]).toBe("function_call");
+ expect(body.output[0].name).toBe("get_weather");
+ });
+
+ it("back-compat: no blocks keeps legacy message-first output[]", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "responses nonstream no-blocks" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const body = await postResponses("responses nonstream no-blocks");
+ const types = body.output.map((o) => o.type);
+ expect(types.indexOf("message")).toBeLessThan(types.indexOf("function_call"));
+ expect(types[0]).toBe("message");
+ });
+ });
+});
diff --git a/src/__tests__/fixture-blocks-ollama.test.ts b/src/__tests__/fixture-blocks-ollama.test.ts
new file mode 100644
index 00000000..c7a4e2fc
--- /dev/null
+++ b/src/__tests__/fixture-blocks-ollama.test.ts
@@ -0,0 +1,137 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+
+interface OllamaChunk {
+ model?: string;
+ created_at?: string;
+ message?: {
+ role?: string;
+ content?: string;
+ reasoning_content?: string;
+ tool_calls?: Array<{ function: { name: string; arguments: unknown } }>;
+ };
+ done?: boolean;
+ [key: string]: unknown;
+}
+
+function parseNDJSON(body: string): OllamaChunk[] {
+ return body
+ .split("\n")
+ .filter((line) => line.trim().length > 0)
+ .map((line) => JSON.parse(line) as OllamaChunk);
+}
+
+async function ollamaChatStream(mock: LLMock, userMessage: string): Promise {
+ const res = await fetch(`${mock.url}/api/chat`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ model: "llama3.1",
+ messages: [{ role: "user", content: userMessage }],
+ stream: true,
+ }),
+ });
+ return parseNDJSON(await res.text());
+}
+
+describe("Ollama — fixture block ordering (tool-first)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("emits the tool_calls chunk before the content chunk when blocks are tool-first", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test ollama blocks tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ],
+ },
+ });
+ await mock.start();
+
+ const chunks = await ollamaChatStream(mock, "test ollama blocks tool-first");
+
+ const toolChunkIdx = chunks.findIndex((c) => c.message?.tool_calls?.length);
+ const contentChunkIdx = chunks.findIndex((c) => c.message?.content);
+
+ expect(toolChunkIdx).toBeGreaterThanOrEqual(0);
+ expect(contentChunkIdx).toBeGreaterThanOrEqual(0);
+ // Tool-first block order: the tool_calls-bearing chunk precedes the content chunk.
+ expect(toolChunkIdx).toBeLessThan(contentChunkIdx);
+
+ // Tool call payload is preserved.
+ const toolChunk = chunks[toolChunkIdx];
+ expect(toolChunk.message!.tool_calls![0].function.name).toBe("get_weather");
+ expect(toolChunk.message!.tool_calls![0].function.arguments).toEqual({ city: "NYC" });
+
+ // Content is preserved across content chunks, in order, after the tool call.
+ const fullContent = chunks.map((c) => c.message?.content ?? "").join("");
+ expect(fullContent).toBe("Here you go.");
+
+ // Final/done chunk preserved exactly as legacy (done:true + timing fields).
+ const doneChunk = chunks.at(-1)!;
+ expect(doneChunk.done).toBe(true);
+ expect(doneChunk).toHaveProperty("total_duration");
+ });
+
+ it("back-compat: a no-blocks fixture is byte-identical to the legacy text-first stream", async () => {
+ // Legacy fixture (no blocks) drives the untouched else branch.
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test ollama legacy" },
+ response: {
+ content: "Let me check.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+ const legacyChunks = await ollamaChatStream(mock, "test ollama legacy");
+ await mock.stop();
+ mock = null;
+
+ // Same content+toolCalls expressed as text-first blocks should produce the
+ // SAME wire order as the legacy path (content chunks first, then tool_calls).
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test ollama blocks text-first" },
+ response: {
+ content: "Let me check.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "text", text: "Let me check." },
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ ],
+ },
+ });
+ await mock.start();
+ const blockChunks = await ollamaChatStream(mock, "test ollama blocks text-first");
+
+ // Normalize out per-request timestamps that legitimately differ.
+ const normalize = (chunks: OllamaChunk[]) =>
+ chunks.map((c) => {
+ const { created_at, ...rest } = c;
+ void created_at;
+ return rest;
+ });
+
+ const legacyContentIdx = legacyChunks.findIndex((c) => c.message?.content);
+ const legacyToolIdx = legacyChunks.findIndex((c) => c.message?.tool_calls?.length);
+ const blockContentIdx = blockChunks.findIndex((c) => c.message?.content);
+ const blockToolIdx = blockChunks.findIndex((c) => c.message?.tool_calls?.length);
+
+ // Legacy is text-first; text-first blocks must match that ordering.
+ expect(legacyContentIdx).toBeLessThan(legacyToolIdx);
+ expect(blockContentIdx).toBeLessThan(blockToolIdx);
+ expect(normalize(blockChunks)).toEqual(normalize(legacyChunks));
+ });
+});
diff --git a/src/__tests__/fixture-blocks-openai.test.ts b/src/__tests__/fixture-blocks-openai.test.ts
new file mode 100644
index 00000000..6c28c727
--- /dev/null
+++ b/src/__tests__/fixture-blocks-openai.test.ts
@@ -0,0 +1,159 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+import type { SSEChunk } from "../types.js";
+
+function parseSSEChunks(body: string): SSEChunk[] {
+ return body
+ .split("\n\n")
+ .filter((line) => line.startsWith("data: ") && !line.includes("[DONE]"))
+ .map((line) => JSON.parse(line.slice(6)) as SSEChunk);
+}
+
+describe("OpenAI Chat Completions — fixture block ordering (#274)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("emits tool_call delta chunks before content delta chunks for a tool-first blocks fixture", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test blocks tool-first" },
+ response: {
+ // Legacy fields preserved for the guard; blocks drives emission order.
+ content: "After the call.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "After the call." },
+ ],
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "test blocks tool-first" }],
+ stream: true,
+ }),
+ });
+
+ const chunks = parseSSEChunks(await res.text());
+ const contentChunks = chunks.filter((c) => c.choices?.[0]?.delta?.content);
+ const toolChunks = chunks.filter((c) => c.choices?.[0]?.delta?.tool_calls);
+ const finishChunk = chunks.find((c) => c.choices?.[0]?.finish_reason);
+
+ expect(contentChunks.length).toBeGreaterThan(0);
+ expect(toolChunks.length).toBeGreaterThan(0);
+
+ // The block array is [toolCall, text], so the emitted SSE chunk SEQUENCE
+ // must place the tool_call delta chunk(s) BEFORE the content delta chunk(s).
+ const firstToolIdx = chunks.indexOf(toolChunks[0]);
+ const firstContentIdx = chunks.indexOf(contentChunks[0]);
+ expect(firstToolIdx).toBeLessThan(firstContentIdx);
+
+ // Content + finish_reason preserved exactly as the legacy path.
+ const fullContent = contentChunks.map((c) => c.choices[0].delta.content).join("");
+ expect(fullContent).toBe("After the call.");
+ expect(finishChunk!.choices[0].finish_reason).toBe("tool_calls");
+
+ // Tool call assembled correctly with index 0.
+ expect(toolChunks[0].choices[0].delta.tool_calls![0].index).toBe(0);
+ expect(toolChunks[0].choices[0].delta.tool_calls![0].function!.name).toBe("get_weather");
+ });
+
+ it("assigns tool_call index in block encounter order for interleaved blocks", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test blocks interleave" },
+ response: {
+ content: "A B",
+ toolCalls: [
+ { name: "fn_a", arguments: '{"a":1}' },
+ { name: "fn_b", arguments: '{"b":2}' },
+ ],
+ blocks: [
+ { type: "toolCall", name: "fn_a", arguments: '{"a":1}' },
+ { type: "text", text: "A " },
+ { type: "toolCall", name: "fn_b", arguments: '{"b":2}' },
+ { type: "text", text: "B" },
+ ],
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "test blocks interleave" }],
+ stream: true,
+ }),
+ });
+
+ const chunks = parseSSEChunks(await res.text());
+
+ // Encounter-order index assignment: fn_a -> 0, fn_b -> 1.
+ const initialToolChunks = chunks.filter(
+ (c) => c.choices?.[0]?.delta?.tool_calls?.[0]?.function?.name,
+ );
+ expect(initialToolChunks.map((c) => c.choices[0].delta.tool_calls![0].index)).toEqual([0, 1]);
+ expect(initialToolChunks[0].choices[0].delta.tool_calls![0].function!.name).toBe("fn_a");
+ expect(initialToolChunks[1].choices[0].delta.tool_calls![0].function!.name).toBe("fn_b");
+
+ // Wire sequence reflects block order: first tool chunk precedes first content chunk.
+ const firstToolIdx = chunks.indexOf(initialToolChunks[0]);
+ const firstContentIdx = chunks.findIndex((c) => c.choices?.[0]?.delta?.content);
+ expect(firstToolIdx).toBeLessThan(firstContentIdx);
+
+ const fullContent = chunks
+ .filter((c) => c.choices?.[0]?.delta?.content)
+ .map((c) => c.choices[0].delta.content)
+ .join("");
+ expect(fullContent).toBe("A B");
+ });
+
+ it("back-compat: a fixture WITHOUT blocks streams content-first (legacy path untouched)", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test no blocks legacy" },
+ response: {
+ content: "Let me check.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "test no blocks legacy" }],
+ stream: true,
+ }),
+ });
+
+ const chunks = parseSSEChunks(await res.text());
+ const contentChunks = chunks.filter((c) => c.choices?.[0]?.delta?.content);
+ const toolChunks = chunks.filter((c) => c.choices?.[0]?.delta?.tool_calls);
+ const finishChunk = chunks.find((c) => c.choices?.[0]?.finish_reason);
+
+ // Legacy: content strictly before tool calls.
+ const lastContentIdx = chunks.lastIndexOf(contentChunks.at(-1)!);
+ const firstToolIdx = chunks.indexOf(toolChunks[0]);
+ expect(lastContentIdx).toBeLessThan(firstToolIdx);
+
+ const fullContent = contentChunks.map((c) => c.choices[0].delta.content).join("");
+ expect(fullContent).toBe("Let me check.");
+ expect(finishChunk!.choices[0].finish_reason).toBe("tool_calls");
+ });
+});
diff --git a/src/__tests__/fixture-blocks-responses.test.ts b/src/__tests__/fixture-blocks-responses.test.ts
new file mode 100644
index 00000000..21e7e46a
--- /dev/null
+++ b/src/__tests__/fixture-blocks-responses.test.ts
@@ -0,0 +1,201 @@
+/**
+ * T1e — OpenAI Responses API: ordered `blocks` streaming.
+ *
+ * When a combined content+toolCalls fixture sets the optional `blocks` field,
+ * the Responses builder must assign `output_index` and assemble
+ * `response.completed.output` in the blocks' ARRAY ORDER. A `toolCall` block
+ * placed before a `text` block therefore yields a `function_call` output item
+ * at the LOWER `output_index`, appearing FIRST in the final `output` array —
+ * the opposite of the legacy (message-always-first) hardcoding.
+ *
+ * Real mock-server surface (mirrors content-with-toolcalls.test.ts): an actual
+ * `LLMock` listens, a real HTTP request streams SSE, and assertions read the
+ * wire bytes.
+ */
+import { describe, it, expect, afterEach } from "vitest";
+import { LLMock } from "../llmock.js";
+import type { FixtureBlock } from "../types.js";
+
+function parseResponsesSSEEvents(body: string): Array<{ type: string; [key: string]: unknown }> {
+ return body
+ .split("\n\n")
+ .filter((block) => block.trim().length > 0)
+ .map((block) => {
+ const dataLine = block.split("\n").find((l) => l.startsWith("data: "));
+ if (!dataLine) return null;
+ return JSON.parse(dataLine.slice(6)) as { type: string; [key: string]: unknown };
+ })
+ .filter(Boolean) as Array<{ type: string; [key: string]: unknown }>;
+}
+
+describe("OpenAI Responses API — fixture block ordering (#274)", () => {
+ let mock: LLMock | null = null;
+
+ afterEach(async () => {
+ if (mock) {
+ await mock.stop();
+ mock = null;
+ }
+ });
+
+ it("tool-first blocks: function_call takes output_index 0 and leads response.output", async () => {
+ mock = new LLMock({ port: 0 });
+ const blocks: FixtureBlock[] = [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ];
+ mock.addFixture({
+ match: { userMessage: "responses blocks tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks,
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: "responses blocks tool-first" }],
+ stream: true,
+ }),
+ });
+
+ const events = parseResponsesSSEEvents(await res.text());
+
+ // The function_call output item must be added at output_index 0 (before the
+ // message item), proving block-order output-index assignment.
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ expect(fcAdded).toBeDefined();
+ expect(msgAdded).toBeDefined();
+ expect((fcAdded as { output_index: number }).output_index).toBe(0);
+ expect((msgAdded as { output_index: number }).output_index).toBe(1);
+
+ // The final completed.output array must lead with the function_call item.
+ const completed = events.find((e) => e.type === "response.completed");
+ const output = (completed!.response as { output: Array<{ type: string }> }).output;
+ const types = output.map((o) => o.type);
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message"));
+ expect(types[0]).toBe("function_call");
+
+ // Content + arguments still stream fully.
+ const allTextDeltas = events
+ .filter((e) => e.type === "response.output_text.delta")
+ .map((e) => (e as unknown as { delta: string }).delta)
+ .join("");
+ expect(allTextDeltas).toBe("Here you go.");
+ const allArgDeltas = events
+ .filter((e) => e.type === "response.function_call_arguments.delta")
+ .map((e) => (e as unknown as { delta: string }).delta)
+ .join("");
+ expect(allArgDeltas).toBe('{"city":"NYC"}');
+ });
+
+ it("back-compat: a fixture WITHOUT blocks keeps the legacy message-first ordering", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "responses no blocks legacy" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: "responses no blocks legacy" }],
+ stream: true,
+ }),
+ });
+
+ const events = parseResponsesSSEEvents(await res.text());
+
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ // Legacy hardcoding: message at index 0, function_call at index 1.
+ expect((msgAdded as { output_index: number }).output_index).toBe(0);
+ expect((fcAdded as { output_index: number }).output_index).toBe(1);
+
+ const completed = events.find((e) => e.type === "response.completed");
+ const output = (completed!.response as { output: Array<{ type: string }> }).output;
+ const types = output.map((o) => o.type);
+ expect(types.indexOf("message")).toBeLessThan(types.indexOf("function_call"));
+ });
+
+ it("empty blocks array falls back to the legacy path (content/toolCalls + terminal completed)", async () => {
+ mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "responses empty blocks" },
+ response: {
+ content: "Sure.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [],
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/responses`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ input: [{ role: "user", content: "responses empty blocks" }],
+ stream: true,
+ }),
+ });
+
+ const events = parseResponsesSSEEvents(await res.text());
+
+ // Empty blocks must fall back to legacy: both a message and a function_call item.
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ expect(msgAdded).toBeDefined();
+ expect(fcAdded).toBeDefined();
+
+ // Content + arguments still stream fully (not silently dropped).
+ const allTextDeltas = events
+ .filter((e) => e.type === "response.output_text.delta")
+ .map((e) => (e as unknown as { delta: string }).delta)
+ .join("");
+ expect(allTextDeltas).toBe("Sure.");
+ const allArgDeltas = events
+ .filter((e) => e.type === "response.function_call_arguments.delta")
+ .map((e) => (e as unknown as { delta: string }).delta)
+ .join("");
+ expect(allArgDeltas).toBe('{"city":"NYC"}');
+
+ // Terminal completed event with usage is present (not a role+finish-only stream).
+ const completed = events.find((e) => e.type === "response.completed");
+ expect(completed).toBeDefined();
+ expect((completed!.response as { usage?: unknown }).usage).toBeDefined();
+ });
+});
diff --git a/src/__tests__/fixture-blocks-scoped-out.test.ts b/src/__tests__/fixture-blocks-scoped-out.test.ts
new file mode 100644
index 00000000..a325b545
--- /dev/null
+++ b/src/__tests__/fixture-blocks-scoped-out.test.ts
@@ -0,0 +1,143 @@
+/**
+ * #274 slot T3 — SCOPED-OUT consumer safety for ordered `blocks`.
+ *
+ * The `blocks` field is honored only by the five in-scope stream builders
+ * (OpenAI chat, Anthropic, Gemini, Ollama, OpenAI Responses + the WS Responses
+ * dispatch). The OTHER consumers of `isContentWithToolCallsResponse` —
+ * Bedrock (`/model/{id}/invoke`), Cohere (`/v2/chat`), and Gemini Interactions
+ * (`/v1beta/interactions`) — were deliberately left UNCHANGED: they read only
+ * `.content` / `.toolCalls` and must completely IGNORE `.blocks`.
+ *
+ * These tests drive each scoped-out consumer with a fixture that ALSO carries a
+ * `blocks` array (in an order that differs from the legacy text-first shape).
+ * The consumer must NOT crash and must serve the legacy `{content, toolCalls}`
+ * payload exactly as if `blocks` were absent.
+ */
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture, FixtureBlock } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+function post(
+ url: string,
+ body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+ return new Promise((resolve, reject) => {
+ const data = JSON.stringify(body);
+ const parsed = new URL(url);
+ const req = http.request(
+ {
+ hostname: parsed.hostname,
+ port: parsed.port,
+ path: parsed.pathname,
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ "Content-Length": Buffer.byteLength(data),
+ },
+ },
+ (res) => {
+ const chunks: Buffer[] = [];
+ res.on("data", (c: Buffer) => chunks.push(c));
+ res.on("end", () => {
+ resolve({
+ status: res.statusCode ?? 0,
+ headers: res.headers,
+ body: Buffer.concat(chunks).toString(),
+ });
+ });
+ },
+ );
+ req.on("error", reject);
+ req.write(data);
+ req.end();
+ });
+}
+
+// A combined content+toolCalls fixture that ALSO carries a tool-first `blocks`
+// array — the exact shape the scoped-out consumers must ignore.
+const toolFirstBlocks: FixtureBlock[] = [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"SF"}' },
+ { type: "text", text: "Let me help you" },
+];
+
+const blocksBearingFixture: Fixture = {
+ match: { userMessage: "scoped-out blocks" },
+ response: {
+ content: "Let me help you",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"SF"}' }],
+ blocks: toolFirstBlocks,
+ },
+};
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+ if (instance) {
+ await new Promise((resolve) => {
+ instance!.server.close(() => resolve());
+ });
+ instance = null;
+ }
+});
+
+describe("#274 scoped-out consumers ignore `blocks` without crashing", () => {
+ it("Bedrock /model/{id}/invoke serves legacy content+tool_use, ignoring blocks", async () => {
+ instance = await createServer([blocksBearingFixture]);
+ const res = await post(
+ `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+ {
+ anthropic_version: "bedrock-2023-05-31",
+ max_tokens: 512,
+ messages: [{ role: "user", content: "scoped-out blocks" }],
+ },
+ );
+
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body);
+ expect(body.type).toBe("message");
+ // Legacy text-first Anthropic shape: text content then tool_use — NOT the
+ // tool-first ordering carried in `blocks` (which Bedrock must ignore).
+ expect(body.content[0].type).toBe("text");
+ expect(body.content[0].text).toBe("Let me help you");
+ expect(body.content[1].type).toBe("tool_use");
+ expect(body.content[1].name).toBe("get_weather");
+ expect(body.content[1].input).toEqual({ city: "SF" });
+ expect(body.stop_reason).toBe("tool_use");
+ });
+
+ it("Cohere /v2/chat serves legacy content+tool_calls, ignoring blocks", async () => {
+ instance = await createServer([blocksBearingFixture]);
+ const res = await post(`${instance.url}/v2/chat`, {
+ model: "command-r-plus",
+ messages: [{ role: "user", content: "scoped-out blocks" }],
+ stream: false,
+ });
+
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body);
+ // Cohere reads only content/toolCalls; blocks is ignored, no crash.
+ expect(body.message.tool_calls).toHaveLength(1);
+ expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+ expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"SF"}');
+ });
+
+ it("Gemini Interactions /v1beta/interactions serves legacy steps, ignoring blocks", async () => {
+ instance = await createServer([blocksBearingFixture]);
+ const res = await post(`${instance.url}/v1beta/interactions`, {
+ model: "gemini-2.5-flash",
+ input: "scoped-out blocks",
+ stream: false,
+ });
+
+ expect(res.status).toBe(200);
+ const body = JSON.parse(res.body);
+ // Reads only content/toolCalls; blocks is ignored, no crash.
+ expect(body.status).toBe("requires_action");
+ expect(body.output_text).toBe("Let me help you");
+ expect(body.steps).toHaveLength(2);
+ expect(body.steps[0].type).toBe("model_output");
+ expect(body.steps[1].type).toBe("function_call");
+ expect(body.steps[1].name).toBe("get_weather");
+ });
+});
diff --git a/src/__tests__/fixture-loader.test.ts b/src/__tests__/fixture-loader.test.ts
index cc0d0b4f..c7701a22 100644
--- a/src/__tests__/fixture-loader.test.ts
+++ b/src/__tests__/fixture-loader.test.ts
@@ -762,6 +762,160 @@ describe("validateFixtures", () => {
).toBe(true);
});
+ // --- blocks checks (#274 F3+F8) ---
+ // A malformed `blocks` array must be REJECTED at load time so it never
+ // reaches the dispatch/builder (where resolveFixtureBlocks would throw AFTER
+ // the journal already recorded status:200). Mirrors the toolCalls checks.
+
+ it("error: blocks is not an array", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ blocks: "not-an-array",
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some((r) => r.severity === "error" && r.message.includes("blocks must be an array")),
+ ).toBe(true);
+ });
+
+ it("error: block with unknown type", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ blocks: [{ type: "bogus" }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) => r.severity === "error" && r.message.includes("type") && r.message.includes("blocks"),
+ ),
+ ).toBe(true);
+ });
+
+ it("error: text block with non-string text", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ // object `text` would replay as `[object Object]`
+ blocks: [{ type: "text", text: { nested: true } }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) =>
+ r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("text"),
+ ),
+ ).toBe(true);
+ });
+
+ it("error: toolCall block with non-string name", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ blocks: [{ type: "toolCall", name: 123, arguments: "{}" }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) =>
+ r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("name"),
+ ),
+ ).toBe(true);
+ });
+
+ it("error: toolCall block with empty name", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ blocks: [{ type: "toolCall", name: "", arguments: "{}" }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) =>
+ r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("name"),
+ ),
+ ).toBe(true);
+ });
+
+ it("error: toolCall block with invalid-JSON arguments", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ blocks: [{ type: "toolCall", name: "fn", arguments: "not json" }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) =>
+ r.severity === "error" &&
+ r.message.includes("blocks[0]") &&
+ r.message.includes("not valid JSON"),
+ ),
+ ).toBe(true);
+ });
+
+ it("error: toolCall block with non-string id", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "ok",
+ toolCalls: [{ name: "fn", arguments: "{}" }],
+ blocks: [{ type: "toolCall", name: "fn", arguments: "{}", id: 7 }],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(
+ results.some(
+ (r) =>
+ r.severity === "error" && r.message.includes("blocks[0]") && r.message.includes("id"),
+ ),
+ ).toBe(true);
+ });
+
+ it("no error: a valid blocks array passes validation", () => {
+ const fixtures = [
+ makeFixture({
+ response: {
+ content: "Done.",
+ toolCalls: [{ name: "search", arguments: '{"q":"x"}' }],
+ blocks: [
+ { type: "toolCall", name: "search", arguments: '{"q":"x"}', id: "call_1" },
+ { type: "text", text: "Done." },
+ ],
+ } as never,
+ }),
+ ];
+ const results = validateFixtures(fixtures);
+ expect(results.filter((r) => r.severity === "error")).toEqual([]);
+ });
+
it("error: error response with empty message", () => {
const fixtures = [
makeFixture({ response: { error: { message: "", type: "e" }, status: 500 } }),
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
index bd84cd0a..da925d38 100644
--- a/src/__tests__/recorder.test.ts
+++ b/src/__tests__/recorder.test.ts
@@ -776,6 +776,120 @@ describe("recorder streaming collapse", () => {
}
});
+ // ---- Ordered `blocks` persistence (#274) ---------------------------------
+ // A tool-call-before-text Anthropic stream is interleaved, so the recorder
+ // must persist the ordered `blocks` array; an ordinary text-then-tools stream
+ // is NOT interleaved, so the recorder keeps the legacy `{content, toolCalls}`
+ // shape with NO `blocks` key (golden recordings stay byte-identical).
+ async function recordAnthropicStream(
+ sse: string,
+ prefix: string,
+ ): Promise> {
+ const anthropicUpstream = http.createServer((_upReq, upRes) => {
+ upRes.writeHead(200, { "Content-Type": "text/event-stream" });
+ upRes.end(sse);
+ });
+ await new Promise((resolve) => anthropicUpstream.listen(0, "127.0.0.1", () => resolve()));
+ const upstreamPort = (anthropicUpstream.address() as { port: number }).port;
+ const fixturePath = fs.mkdtempSync(path.join(os.tmpdir(), prefix));
+
+ const recorderServer = http.createServer((req, res) => {
+ const chunks: Buffer[] = [];
+ req.on("data", (c: Buffer) => chunks.push(c));
+ req.on("end", async () => {
+ const rawBody = Buffer.concat(chunks).toString();
+ await proxyAndRecord(
+ req,
+ res,
+ JSON.parse(rawBody),
+ "anthropic",
+ "/v1/messages",
+ [],
+ {
+ record: {
+ providers: { anthropic: `http://127.0.0.1:${upstreamPort}` },
+ fixturePath,
+ },
+ logger: new Logger("silent"),
+ },
+ rawBody,
+ );
+ });
+ });
+ await new Promise((resolve) => recorderServer.listen(0, "127.0.0.1", () => resolve()));
+ const recorderPort = (recorderServer.address() as { port: number }).port;
+
+ try {
+ const resp = await post(`http://127.0.0.1:${recorderPort}/v1/messages`, {
+ model: "claude-3-7-sonnet-20250219",
+ max_tokens: 1024,
+ stream: true,
+ messages: [{ role: "user", content: "go" }],
+ });
+ expect(resp.status).toBe(200);
+ const files = fs.readdirSync(fixturePath).filter((f) => f.endsWith(".json"));
+ expect(files).toHaveLength(1);
+ const fixtureContent = JSON.parse(
+ fs.readFileSync(path.join(fixturePath, files[0]), "utf-8"),
+ ) as FixtureFile;
+ return fixtureContent.fixtures[0].response as Record;
+ } finally {
+ await new Promise((resolve) => anthropicUpstream.close(() => resolve()));
+ await new Promise((resolve) => recorderServer.close(() => resolve()));
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ }
+
+ it("persists ordered blocks for a tool-before-text streamed turn", async () => {
+ const sse = [
+ `event: content_block_start\ndata: ${JSON.stringify({ index: 0, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`,
+ "",
+ `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "input_json_delta", partial_json: '{"city":"Paris"}' } })}`,
+ "",
+ `event: content_block_stop\ndata: ${JSON.stringify({ index: 0 })}`,
+ "",
+ `event: content_block_start\ndata: ${JSON.stringify({ index: 1, content_block: { type: "text", text: "" } })}`,
+ "",
+ `event: content_block_delta\ndata: ${JSON.stringify({ index: 1, delta: { type: "text_delta", text: "Done." } })}`,
+ "",
+ `event: content_block_stop\ndata: ${JSON.stringify({ index: 1 })}`,
+ "",
+ `event: message_stop\ndata: {}`,
+ "",
+ ].join("\n");
+ const saved = await recordAnthropicStream(sse, "aimock-recorder-blocks-tool-");
+ expect(saved.blocks).toEqual([
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"Paris"}', id: "toolu_1" },
+ { type: "text", text: "Done." },
+ ]);
+ // Legacy fields remain populated for replay/back-compat.
+ expect(saved.content).toBe("Done.");
+ expect(saved.toolCalls).toHaveLength(1);
+ });
+
+ it("persists the legacy shape (no blocks) for a text-then-tools streamed turn", async () => {
+ const sse = [
+ `event: content_block_start\ndata: ${JSON.stringify({ index: 0, content_block: { type: "text", text: "" } })}`,
+ "",
+ `event: content_block_delta\ndata: ${JSON.stringify({ index: 0, delta: { type: "text_delta", text: "Sure." } })}`,
+ "",
+ `event: content_block_stop\ndata: ${JSON.stringify({ index: 0 })}`,
+ "",
+ `event: content_block_start\ndata: ${JSON.stringify({ index: 1, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`,
+ "",
+ `event: content_block_delta\ndata: ${JSON.stringify({ index: 1, delta: { type: "input_json_delta", partial_json: '{"city":"Paris"}' } })}`,
+ "",
+ `event: content_block_stop\ndata: ${JSON.stringify({ index: 1 })}`,
+ "",
+ `event: message_stop\ndata: {}`,
+ "",
+ ].join("\n");
+ const saved = await recordAnthropicStream(sse, "aimock-recorder-blocks-text-");
+ expect(saved.blocks).toBeUndefined();
+ expect(saved.content).toBe("Sure.");
+ expect(saved.toolCalls).toHaveLength(1);
+ });
+
it("captures Anthropic redacted_thinking block data into the recorded fixture's redactedThinking", async () => {
const REDACTED_DATA = "EncryptedRedactedThinkingPayloadAAA==";
// Raw Anthropic SSE upstream that streams a redacted_thinking block (its
diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts
index f4d3087e..4908cb84 100644
--- a/src/__tests__/stream-collapse.test.ts
+++ b/src/__tests__/stream-collapse.test.ts
@@ -3583,3 +3583,349 @@ describe("harmony fail-safe — quoted whole-message ambiguity (known limitation
expect(direct.content).not.toBe("To emit write hello<|return|> and then stop");
});
});
+
+// ---------------------------------------------------------------------------
+// Cross-channel block-order instrumentation (#274)
+//
+// The collapsers must retain enough cross-channel order to let the recorder
+// decide whether a stream is "interleaved" — a tool-call delta appears
+// strictly before the first content delta, OR a content delta appears after
+// any tool-call delta. When interleaved, `CollapseResult.blocks` carries the
+// ordered FixtureBlock[] in stream order. When NOT interleaved (text-first,
+// text-only, or tool-only), `blocks` stays undefined so the recorder persists
+// the legacy `{ content, toolCalls }` shape byte-identically.
+// ---------------------------------------------------------------------------
+
+describe("stream block-order instrumentation (#274)", () => {
+ // ---- OpenAI SSE ----------------------------------------------------------
+ describe("collapseOpenAISSE blocks", () => {
+ const textDelta = (text: string) =>
+ `data: ${JSON.stringify({ choices: [{ delta: { content: text } }] })}`;
+ const toolDelta = (index: number, opts: { id?: string; name?: string; args?: string }) =>
+ `data: ${JSON.stringify({
+ choices: [
+ {
+ delta: {
+ tool_calls: [
+ {
+ index,
+ ...(opts.id ? { id: opts.id } : {}),
+ function: {
+ ...(opts.name ? { name: opts.name } : {}),
+ ...(opts.args !== undefined ? { arguments: opts.args } : {}),
+ },
+ },
+ ],
+ },
+ },
+ ],
+ })}`;
+
+ it("text-first stream is NOT interleaved → no blocks", () => {
+ const body = [
+ textDelta("Hello "),
+ "",
+ textDelta("world"),
+ "",
+ toolDelta(0, { id: "call_1", name: "get_weather", args: '{"city":"Paris"}' }),
+ "",
+ "data: [DONE]",
+ "",
+ ].join("\n");
+ const result = collapseOpenAISSE(body);
+ expect(result.content).toBe("Hello world");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("tool-first stream is interleaved → blocks in tool-first order", () => {
+ const body = [
+ toolDelta(0, { id: "call_1", name: "get_weather", args: '{"city":"Paris"}' }),
+ "",
+ textDelta("Here you go"),
+ "",
+ "data: [DONE]",
+ "",
+ ].join("\n");
+ const result = collapseOpenAISSE(body);
+ expect(result.blocks).toBeDefined();
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"Paris"}', id: "call_1" },
+ { type: "text", text: "Here you go" },
+ ]);
+ });
+
+ it("tools→text→tools interleave is captured in stream order", () => {
+ const body = [
+ toolDelta(0, { id: "call_1", name: "a", args: "{}" }),
+ "",
+ textDelta("middle"),
+ "",
+ toolDelta(1, { id: "call_2", name: "b", args: "{}" }),
+ "",
+ "data: [DONE]",
+ "",
+ ].join("\n");
+ const result = collapseOpenAISSE(body);
+ expect(result.blocks).toBeDefined();
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "a", arguments: "{}", id: "call_1" },
+ { type: "text", text: "middle" },
+ { type: "toolCall", name: "b", arguments: "{}", id: "call_2" },
+ ]);
+ });
+
+ it("text-only stream has no blocks", () => {
+ const body = [textDelta("just text"), "", "data: [DONE]", ""].join("\n");
+ expect(collapseOpenAISSE(body).blocks).toBeUndefined();
+ });
+
+ it("tool-only stream has no blocks (no content channel to order against)", () => {
+ const body = [
+ toolDelta(0, { id: "call_1", name: "a", args: "{}" }),
+ "",
+ "data: [DONE]",
+ "",
+ ].join("\n");
+ expect(collapseOpenAISSE(body).blocks).toBeUndefined();
+ });
+
+ // ---- #274 F4/F5: blocks must agree with flat toolCalls -----------------
+
+ it("out-of-arrival-order tool indices: blocks[i] and toolCalls[i] are the SAME call (F4)", () => {
+ // First-arriving tool call carries the HIGHER index (5); second carries
+ // the LOWER (1). Index-sorting the flat list would put call_low first,
+ // disagreeing with the stream-arrival-ordered blocks. Interleave a text
+ // delta after the first tool so `blocks` is emitted.
+ const body = [
+ toolDelta(5, { id: "call_high", name: "first_arrived", args: '{"a":1}' }),
+ "",
+ textDelta("between"),
+ "",
+ toolDelta(1, { id: "call_low", name: "second_arrived", args: '{"b":2}' }),
+ "",
+ "data: [DONE]",
+ "",
+ ].join("\n");
+ const result = collapseOpenAISSE(body);
+ expect(result.blocks).toBeDefined();
+ const blockToolCalls = result.blocks!.filter((b) => b.type === "toolCall");
+ // blocks[i] and toolCalls[i] must describe the SAME call (consistent
+ // ordering + identity). Pre-fix the flat list was index-sorted
+ // (call_low first) while blocks were arrival-sorted (call_high first).
+ expect(result.toolCalls).toHaveLength(blockToolCalls.length);
+ result.toolCalls!.forEach((tc, i) => {
+ const block = blockToolCalls[i] as { name: string; arguments: string; id?: string };
+ expect(block.name).toBe(tc.name);
+ expect(block.arguments).toBe(tc.arguments);
+ expect(block.id).toBe(tc.id);
+ });
+ // Concretely: the first call by stream arrival is `first_arrived`.
+ expect(result.toolCalls![0].name).toBe("first_arrived");
+ expect((blockToolCalls[0] as { name: string }).name).toBe("first_arrived");
+ });
+
+ it("tool-call with no argument deltas: block arguments is valid JSON '{}', matching flat (F5)", () => {
+ // The first tool call NEVER receives any `arguments` fragment — its
+ // accumulator stays "". A text delta after it makes the stream
+ // interleaved so `blocks` is emitted.
+ const body = [
+ toolDelta(0, { id: "call_noargs", name: "no_args" }),
+ "",
+ textDelta("after"),
+ "",
+ "data: [DONE]",
+ "",
+ ].join("\n");
+ const result = collapseOpenAISSE(body);
+ expect(result.blocks).toBeDefined();
+ const block = result.blocks!.find((b) => b.type === "toolCall") as {
+ arguments: string;
+ };
+ // Pre-fix: block.arguments === "" (invalid JSON), flat sanitized to "{}".
+ expect(block.arguments).toBe("{}");
+ expect(() => JSON.parse(block.arguments)).not.toThrow();
+ // And it agrees with the flat representation.
+ expect(result.toolCalls![0].arguments).toBe("{}");
+ expect(block.arguments).toBe(result.toolCalls![0].arguments);
+ });
+ });
+
+ // ---- Anthropic SSE -------------------------------------------------------
+ describe("collapseAnthropicSSE blocks", () => {
+ const textBlock = (index: number, text: string) =>
+ [
+ `event: content_block_start`,
+ `data: ${JSON.stringify({ type: "content_block_start", index, content_block: { type: "text", text: "" } })}`,
+ "",
+ `event: content_block_delta`,
+ `data: ${JSON.stringify({ type: "content_block_delta", index, delta: { type: "text_delta", text } })}`,
+ "",
+ `event: content_block_stop`,
+ `data: ${JSON.stringify({ type: "content_block_stop", index })}`,
+ ].join("\n");
+ const toolBlock = (index: number, id: string, name: string, args: string) =>
+ [
+ `event: content_block_start`,
+ `data: ${JSON.stringify({ type: "content_block_start", index, content_block: { type: "tool_use", id, name, input: {} } })}`,
+ "",
+ `event: content_block_delta`,
+ `data: ${JSON.stringify({ type: "content_block_delta", index, delta: { type: "input_json_delta", partial_json: args } })}`,
+ "",
+ `event: content_block_stop`,
+ `data: ${JSON.stringify({ type: "content_block_stop", index })}`,
+ ].join("\n");
+
+ it("text-first → no blocks", () => {
+ const body = [textBlock(0, "Hi"), "", toolBlock(1, "toolu_1", "fn", "{}"), ""].join("\n");
+ const result = collapseAnthropicSSE(body);
+ expect(result.content).toBe("Hi");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("tool-first → blocks in tool-first order", () => {
+ const body = [toolBlock(0, "toolu_1", "fn", '{"x":1}'), "", textBlock(1, "after"), ""].join(
+ "\n",
+ );
+ const result = collapseAnthropicSSE(body);
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "fn", arguments: '{"x":1}', id: "toolu_1" },
+ { type: "text", text: "after" },
+ ]);
+ });
+
+ it("tools→text→tools interleave captured in order", () => {
+ const body = [
+ toolBlock(0, "toolu_1", "a", "{}"),
+ "",
+ textBlock(1, "mid"),
+ "",
+ toolBlock(2, "toolu_2", "b", "{}"),
+ "",
+ ].join("\n");
+ const result = collapseAnthropicSSE(body);
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "a", arguments: "{}", id: "toolu_1" },
+ { type: "text", text: "mid" },
+ { type: "toolCall", name: "b", arguments: "{}", id: "toolu_2" },
+ ]);
+ });
+ });
+
+ // ---- Gemini SSE ----------------------------------------------------------
+ describe("collapseGeminiSSE blocks", () => {
+ const textPart = (text: string) =>
+ `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text }] } }] })}`;
+ const fcPart = (name: string, args: Record) =>
+ `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ functionCall: { name, args } }] } }] })}`;
+
+ it("text-first → no blocks", () => {
+ const body = [textPart("Hi"), "", fcPart("fn", {}), ""].join("\n");
+ const result = collapseGeminiSSE(body);
+ expect(result.content).toBe("Hi");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("tool-first → blocks in tool-first order", () => {
+ const body = [fcPart("fn", { x: 1 }), "", textPart("after"), ""].join("\n");
+ const result = collapseGeminiSSE(body);
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "fn", arguments: '{"x":1}' },
+ { type: "text", text: "after" },
+ ]);
+ });
+
+ it("tools→text→tools interleave captured in order", () => {
+ const body = [fcPart("a", {}), "", textPart("mid"), "", fcPart("b", {}), ""].join("\n");
+ const result = collapseGeminiSSE(body);
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "a", arguments: "{}" },
+ { type: "text", text: "mid" },
+ { type: "toolCall", name: "b", arguments: "{}" },
+ ]);
+ });
+
+ // #274 R2-N2: an interleaved Gemini AUDIO turn must NOT carry `blocks`.
+ // The audio collapse shape (AudioResponse) has no `blocks` slot and the
+ // recorder's audio branch never persists it — so producing ordered blocks
+ // on the audio path is silently produced-then-dropped. The companion
+ // content / toolCalls / reasoning are still preserved (flat), just no blocks.
+ const audioPart = (mimeType: string, data: string) =>
+ `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ inlineData: { mimeType, data } }] } }] })}`;
+
+ it("audio turn with interleaved tool/text deltas → no blocks (audio shape)", () => {
+ const body = [
+ fcPart("fn", { x: 1 }),
+ "",
+ textPart("between"),
+ "",
+ audioPart("audio/mpeg", "QUJD"),
+ "",
+ ].join("\n");
+ const result = collapseGeminiSSE(body);
+ // Audio shape: audio bytes captured, companions preserved flat.
+ expect(result.audioB64).toBe("QUJD");
+ expect(result.audioMimeType).toBe("audio/mpeg");
+ expect(result.content).toBe("between");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.toolCalls![0].name).toBe("fn");
+ // The audio result type cannot carry ordered blocks — must be absent.
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("non-audio interleaved turn STILL produces blocks (F4F5 regression guard)", () => {
+ const body = [fcPart("fn", { x: 1 }), "", textPart("after"), ""].join("\n");
+ const result = collapseGeminiSSE(body);
+ expect(result.audioB64).toBeUndefined();
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "fn", arguments: '{"x":1}' },
+ { type: "text", text: "after" },
+ ]);
+ });
+ });
+
+ // ---- Ollama NDJSON -------------------------------------------------------
+ describe("collapseOllamaNDJSON blocks", () => {
+ const textLine = (content: string) =>
+ JSON.stringify({ model: "llama3", message: { role: "assistant", content }, done: false });
+ const toolLine = (name: string, args: Record) =>
+ JSON.stringify({
+ model: "llama3",
+ message: {
+ role: "assistant",
+ content: "",
+ tool_calls: [{ function: { name, arguments: args } }],
+ },
+ done: false,
+ });
+
+ it("text-first → no blocks", () => {
+ const body = [textLine("Hi"), toolLine("fn", {})].join("\n");
+ const result = collapseOllamaNDJSON(body);
+ expect(result.content).toBe("Hi");
+ expect(result.toolCalls).toHaveLength(1);
+ expect(result.blocks).toBeUndefined();
+ });
+
+ it("tool-first → blocks in tool-first order", () => {
+ const body = [toolLine("fn", { x: 1 }), textLine("after")].join("\n");
+ const result = collapseOllamaNDJSON(body);
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "fn", arguments: '{"x":1}' },
+ { type: "text", text: "after" },
+ ]);
+ });
+
+ it("tools→text→tools interleave captured in order", () => {
+ const body = [toolLine("a", {}), textLine("mid"), toolLine("b", {})].join("\n");
+ const result = collapseOllamaNDJSON(body);
+ expect(result.blocks).toEqual([
+ { type: "toolCall", name: "a", arguments: "{}" },
+ { type: "text", text: "mid" },
+ { type: "toolCall", name: "b", arguments: "{}" },
+ ]);
+ });
+ });
+});
diff --git a/src/__tests__/ws-responses.test.ts b/src/__tests__/ws-responses.test.ts
index 5f32187c..eda04698 100644
--- a/src/__tests__/ws-responses.test.ts
+++ b/src/__tests__/ws-responses.test.ts
@@ -1,6 +1,6 @@
import { describe, it, expect, afterEach } from "vitest";
import { createServer, type ServerInstance } from "../server.js";
-import type { Fixture } from "../types.js";
+import type { Fixture, FixtureBlock } from "../types.js";
import { connectWebSocket } from "./ws-test-client.js";
import { SKIPPED_BY_STATE_RE } from "./helpers/strict-matchers.js";
@@ -50,6 +50,23 @@ const toolReasoningFixture: Fixture = {
},
};
+// Combined content+toolCalls fixture carrying an ORDERED `blocks` array placing
+// the tool call BEFORE the text. On the WebSocket /v1/responses surface this
+// must yield the function_call output item at a LOWER output_index than the
+// message item — i.e. the function_call leads the output. Mirrors the HTTP
+// fixture-blocks-responses.test.ts assertions but drives the WS dispatch path.
+const wsBlocksToolFirstFixture: Fixture = {
+ match: { userMessage: "ws blocks tool-first" },
+ response: {
+ content: "Here you go.",
+ toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+ blocks: [
+ { type: "toolCall", name: "get_weather", arguments: '{"city":"NYC"}' },
+ { type: "text", text: "Here you go." },
+ ] as FixtureBlock[],
+ },
+};
+
const allFixtures: Fixture[] = [
textFixture,
toolFixture,
@@ -57,6 +74,7 @@ const allFixtures: Fixture[] = [
reasoningFixture,
capabilityReasoningFixture,
toolReasoningFixture,
+ wsBlocksToolFirstFixture,
];
// --- tests ---
@@ -368,6 +386,65 @@ describe("WebSocket /v1/responses", () => {
ws.close();
});
+ // ── #274: ordered `blocks` must flow through the WS Responses dispatch so the
+ // websocket surface honors tool-first ordering exactly like the HTTP path. ──
+ it("honors fixture block order: function_call leads the message item (tool-first)", async () => {
+ instance = await createServer(allFixtures);
+ const ws = await connectWebSocket(instance.url, "/v1/responses");
+
+ ws.send(responseCreateMsg("ws blocks tool-first"));
+
+ // Collect until response.completed (chunking makes the count variable).
+ const maxEvents = 50;
+ let events: WSEvent[] = [];
+ for (let count = 1; ; count++) {
+ if (count > maxEvents) {
+ throw new Error(
+ `response.completed never arrived within ${maxEvents} events ` +
+ `(last event type: ${events[events.length - 1]?.type})`,
+ );
+ }
+ events = parseEvents(await ws.waitForMessages(count));
+ if (events[events.length - 1].type === "response.completed") break;
+ }
+
+ const fcAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" &&
+ (e.item as { type: string })?.type === "function_call",
+ );
+ const msgAdded = events.find(
+ (e) =>
+ e.type === "response.output_item.added" && (e.item as { type: string })?.type === "message",
+ );
+ expect(fcAdded).toBeDefined();
+ expect(msgAdded).toBeDefined();
+ // Tool-first: the function_call item takes output_index 0, the message 1.
+ expect((fcAdded as unknown as { output_index: number }).output_index).toBe(0);
+ expect((msgAdded as unknown as { output_index: number }).output_index).toBe(1);
+
+ // The terminal completed.output array must lead with the function_call item.
+ const completed = events.find((e) => e.type === "response.completed");
+ const output = (completed!.response as { output: Array<{ type: string }> }).output;
+ const types = output.map((o) => o.type);
+ expect(types.indexOf("function_call")).toBeLessThan(types.indexOf("message"));
+ expect(types[0]).toBe("function_call");
+
+ // Content + arguments still stream fully.
+ const textDeltas = events
+ .filter((e) => e.type === "response.output_text.delta")
+ .map((e) => (e as unknown as { delta: string }).delta)
+ .join("");
+ expect(textDeltas).toBe("Here you go.");
+ const argDeltas = events
+ .filter((e) => e.type === "response.function_call_arguments.delta")
+ .map((e) => (e as unknown as { delta: string }).delta)
+ .join("");
+ expect(argDeltas).toBe('{"city":"NYC"}');
+
+ ws.close();
+ });
+
it("rejects WebSocket upgrade on non-responses path", async () => {
instance = await createServer(allFixtures);
From 2d132df21338a753edfd96c0fe0280f96c9efd1f Mon Sep 17 00:00:00 2001
From: Jordan Ritter
Date: Fri, 26 Jun 2026 23:11:49 -0700
Subject: [PATCH 5/5] docs(fixtures): document the blocks array and
per-provider ordering observability (#274)
Document the fixture blocks array, per-provider ordering behavior, and the
observability surface, and record the change in the changelog.
---
CHANGELOG.md | 3 +
docs/chat-completions/index.html | 11 +++
docs/claude-messages/index.html | 10 +++
docs/fixtures/index.html | 122 ++++++++++++++++++++++++++++++-
docs/gemini/index.html | 10 +++
docs/ollama/index.html | 10 +++
docs/responses-api/index.html | 10 +++
7 files changed, 174 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3cc1bb3d..f964f84d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,9 @@
- Record-mode live proxying for the Veo surface (`record.providers.veo`) — submit and poll forwarded 1:1, eager fixture capture of the Files-API uri on `done:true`; captured operations replay later (#278)
- Native xAI Grok Imagine async video lifecycle mock — `POST /v1/videos/generations` submit (JSON-only; multipart rejected with 400), `GET /v1/videos/{request_id}` poll through `pending → done | failed | expired` with synthesized `progress`, `grokVideo` progression, `cost_in_usd_ticks` units, and a Sora-safe `/v1/videos/{id}` dispatch that leaves the OpenAI video surface unchanged (#278)
- Record-mode live proxying for the Grok surface (`record.providers.grok`) — submit and poll forwarded 1:1, eager fixture capture of url/duration/cost on `done`, `failed` persisted, `expired` passed through; captured jobs replay later (#278)
+- Optional `blocks` array on the combined `content` + `toolCalls` fixture shape lets a fixture express ordered text/tool-call blocks (`{type:"text",text}` | `{type:"toolCall",name,arguments,id?}`); when present it takes precedence over `{content, toolCalls}` for stream order, enabling tool-first and interleaved ordering. Legacy `{content, toolCalls}` fixtures are unchanged (#274)
+- All five providers stream combined responses in fixture block order: Anthropic, OpenAI Responses, and Gemini are fully observable; Ollama is best-effort (clients may reassemble positionally); OpenAI chat-completions emits in order but is degenerate (`delta.content`/`delta.tool_calls` are separate channels the client merges) (#274)
+- Recorder captures block order and persists `blocks` only when the recorded upstream stream was genuinely tool-first or interleaved; text-first streams keep the legacy `{content, toolCalls}` shape so golden recordings round-trip byte-identically (#274)
## [1.34.0] - 2026-06-24
diff --git a/docs/chat-completions/index.html b/docs/chat-completions/index.html
index 11572fbc..ac7f3aaa 100644
--- a/docs/chat-completions/index.html
+++ b/docs/chat-completions/index.html
@@ -227,6 +227,17 @@ Streaming (stream: true)
ChatCompletionChunk type with delta instead of
message.
+
+ Ordered blocks (tool-first)
+
+ A combined content + toolCalls fixture accepts an optional
+ blocks array to control stream order — see
+ Ordered blocks. On chat-completions this is
+ degenerate: delta.content and delta.tool_calls
+ are separate channels the client merges, so the mock emits chunks in block order (the wire
+ order is assertable) but tool-first is not positionally observable to clients. Use
+ Anthropic, the Responses API, or Gemini for fully observable tool-first ordering.
+
diff --git a/docs/claude-messages/index.html b/docs/claude-messages/index.html
index 30d961f6..d999821d 100644
--- a/docs/claude-messages/index.html
+++ b/docs/claude-messages/index.html
@@ -146,6 +146,16 @@ Request Translation
arrays (including content block arrays) to OpenAI-style messages so the same fixtures work
across all providers.
+
+ Ordered blocks (tool-first)
+
+ A combined content + toolCalls fixture accepts an optional
+ blocks array to control stream order — see
+ Ordered blocks. Claude Messages has
+ full support: typed text / tool_use content
+ blocks stream at incrementing indices in array order, so tool-first and interleaved
+ ordering are natively observable to clients.
+
diff --git a/docs/fixtures/index.html b/docs/fixtures/index.html
index 83e9b96c..75bbd646 100644
--- a/docs/fixtures/index.html
+++ b/docs/fixtures/index.html
@@ -312,8 +312,12 @@ Response Types
| Content + Tool Calls |
- content, toolCalls[], reasoning?, finishReason? |
- Text and tool calls in a single response |
+ content, toolCalls[], blocks?, reasoning?, finishReason? |
+
+ Text and tool calls in a single response. Add an optional
+ blocks array to control stream order (e.g. tool-first) — see
+ Ordered blocks below.
+ |
| Error |
@@ -362,6 +366,120 @@ Response Types
+ Ordered blocks (tool-first & interleaved streaming)
+
+ By default a Content + Tool Calls response streams its text first, then
+ its tool calls. To control that order — for example to emit a tool call
+ before any text (“tool-first”), or to interleave text and tool calls
+ — add an optional blocks array. Each entry is one of:
+
+
+ { "type": "text", "text": "..." } — a text segment
+ -
+
{ "type": "toolCall", "name": "...", "arguments": ..., "id": "..." }
+ — a tool call (id optional; arguments accepts an object
+ or string, same auto-stringify rules as elsewhere)
+
+
+
+ When blocks is present it takes precedence over the
+ content and toolCalls fields for stream ordering: the blocks are
+ streamed in array order. (Keep content and toolCalls populated
+ as well — they remain the canonical aggregate for replay and for consumers that do
+ not read blocks.) When blocks is absent, legacy
+ { content, toolCalls } fixtures stream exactly as before — text-first,
+ byte-identical to prior releases. The field is purely additive.
+
+
+
+
{
+ "content": "Here is the weather.",
+ "toolCalls": [
+ { "name": "get_weather", "arguments": { "city": "SF" } }
+ ],
+ "blocks": [
+ { "type": "toolCall", "name": "get_weather", "arguments": { "city": "SF" }, "id": "call_1" },
+ { "type": "text", "text": "Here is the weather." }
+ ]
+}
+
+
+ The example above streams the get_weather tool call before the text.
+ For an interleaved stream, list blocks in the desired order, e.g.
+ [toolCall, text, toolCall].
+
+
+ Per-provider observability
+
+ How faithfully “tool-first” / interleaved order is observable depends on each
+ provider's wire protocol. The mock always emits chunks in block order; what a client can
+ reconstruct from those chunks varies:
+
+
+
+
+ | Provider |
+ Block-order support |
+ Notes |
+
+
+
+
+ | Anthropic (Claude Messages) |
+ Full |
+
+ Typed text / tool_use content blocks at incrementing
+ indices — tool-first and interleaved are natively observable.
+ |
+
+
+ | OpenAI Responses API |
+ Full |
+
+ Ordered output items (message vs function_call) carry
+ output_index — SDKs honor the order, so a tool call can precede
+ the message.
+ |
+
+
+ | Gemini |
+ Full |
+
+ Ordered parts/candidate chunks carry functionCall and text in any
+ order.
+ |
+
+
+ | Ollama |
+ Partial |
+
+ A tool_calls chunk can be emitted before content on the wire, but some
+ clients reassemble positionally. Best-effort.
+ |
+
+
+ | OpenAI chat-completions |
+ Degenerate |
+
+ delta.content and delta.tool_calls are separate channels
+ the client merges. The mock emits chunks in block order (and the wire order is
+ assertable), but the merge is not positionally interleaved, so tool-first
+ is not semantically observable to clients on this channel.
+ |
+
+
+
+
+
+ Recording: In record mode the recorder only persists a
+ blocks array when the recorded upstream stream was
+ genuinely tool-first or interleaved (a tool-call delta arrives before the first
+ content delta, or content arrives after a tool-call delta). Ordinary text-then-tools
+ streams are saved in the legacy { content, toolCalls } shape with no
+ blocks key, so existing golden recordings round-trip byte-identically.
+
+
+
JSON auto-stringify: In fixture files and programmatic API,
diff --git a/docs/gemini/index.html b/docs/gemini/index.html
index e0ca009c..032f6c79 100644
--- a/docs/gemini/index.html
+++ b/docs/gemini/index.html
@@ -183,6 +183,16 @@
Vertex AI
The same fixtures work for both Gemini AI Studio and Vertex AI endpoints. See the
Vertex AI page for configuration details.
+
+
Ordered blocks (tool-first)
+
+ A combined content + toolCalls fixture accepts an optional
+ blocks array to control stream order — see
+ Ordered blocks. Gemini has
+ full support: ordered parts/candidate chunks carry
+ functionCall and text in array order, so tool-first and interleaved ordering
+ are observable to clients.
+
diff --git a/docs/ollama/index.html b/docs/ollama/index.html
index ab675a6c..50ecc137 100644
--- a/docs/ollama/index.html
+++ b/docs/ollama/index.html
@@ -255,6 +255,16 @@ Request Translation
options.num_predict to max_tokens, so the same fixtures work
across all providers.
+
+ Ordered blocks (tool-first)
+
+ A combined content + toolCalls fixture accepts an optional
+ blocks array to control stream order — see
+ Ordered blocks. Ollama support is
+ partial: a tool_calls chunk can be emitted before content on
+ the NDJSON wire, but some clients reassemble positionally, so tool-first is best-effort on
+ this provider.
+
diff --git a/docs/responses-api/index.html b/docs/responses-api/index.html
index d2504ead..73df6704 100644
--- a/docs/responses-api/index.html
+++ b/docs/responses-api/index.html
@@ -160,6 +160,16 @@ SSE Event Sequence
WebSocket APIs page for WebSocket-specific details.
+
+ Ordered blocks (tool-first)
+
+ A combined content + toolCalls fixture accepts an optional
+ blocks array to control stream order — see
+ Ordered blocks. The Responses API has
+ full support: output items (message vs
+ function_call) are assigned output_index in array order, so a
+ tool call can precede the message and SDKs honor the ordering.
+